[llvm] 9c319d5 - [AArch64] Enable subreg liveness tracking by default.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 12 08:08:49 PST 2024


Author: Sander de Smalen
Date: 2024-12-12T16:05:49Z
New Revision: 9c319d5bb40785c969d2af76535ca62448dfafa7

URL: https://github.com/llvm/llvm-project/commit/9c319d5bb40785c969d2af76535ca62448dfafa7
DIFF: https://github.com/llvm/llvm-project/commit/9c319d5bb40785c969d2af76535ca62448dfafa7.diff

LOG: [AArch64] Enable subreg liveness tracking by default.

Internal testing didn't flag up any functional- or performance regressions.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64Subtarget.cpp
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
    llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
    llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
    llvm/test/CodeGen/AArch64/aarch64-addv.ll
    llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
    llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
    llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
    llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
    llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
    llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
    llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
    llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
    llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
    llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
    llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
    llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
    llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
    llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
    llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
    llvm/test/CodeGen/AArch64/aarch64-mops.ll
    llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
    llvm/test/CodeGen/AArch64/aarch64-mulv.ll
    llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
    llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
    llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
    llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
    llvm/test/CodeGen/AArch64/abs.ll
    llvm/test/CodeGen/AArch64/active_lane_mask.ll
    llvm/test/CodeGen/AArch64/adc.ll
    llvm/test/CodeGen/AArch64/add-extract.ll
    llvm/test/CodeGen/AArch64/add.ll
    llvm/test/CodeGen/AArch64/addimm-mulimm.ll
    llvm/test/CodeGen/AArch64/addp-shuffle.ll
    llvm/test/CodeGen/AArch64/addsub_ext.ll
    llvm/test/CodeGen/AArch64/and-mask-removal.ll
    llvm/test/CodeGen/AArch64/andorxor.ll
    llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
    llvm/test/CodeGen/AArch64/arm64-addp.ll
    llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
    llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
    llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
    llvm/test/CodeGen/AArch64/arm64-build-vector.ll
    llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
    llvm/test/CodeGen/AArch64/arm64-dup.ll
    llvm/test/CodeGen/AArch64/arm64-ext.ll
    llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
    llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
    llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
    llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
    llvm/test/CodeGen/AArch64/arm64-fmax.ll
    llvm/test/CodeGen/AArch64/arm64-fp128.ll
    llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
    llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
    llvm/test/CodeGen/AArch64/arm64-ld1.ll
    llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
    llvm/test/CodeGen/AArch64/arm64-mul.ll
    llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
    llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
    llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
    llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
    llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
    llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
    llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
    llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
    llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
    llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
    llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
    llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
    llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
    llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
    llvm/test/CodeGen/AArch64/arm64-nvcast.ll
    llvm/test/CodeGen/AArch64/arm64-popcnt.ll
    llvm/test/CodeGen/AArch64/arm64-rev.ll
    llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
    llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
    llvm/test/CodeGen/AArch64/arm64-stp.ll
    llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
    llvm/test/CodeGen/AArch64/arm64-tbl.ll
    llvm/test/CodeGen/AArch64/arm64-vadd.ll
    llvm/test/CodeGen/AArch64/arm64-vaddv.ll
    llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
    llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
    llvm/test/CodeGen/AArch64/arm64-vmul.ll
    llvm/test/CodeGen/AArch64/arm64-zip.ll
    llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
    llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
    llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
    llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
    llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
    llvm/test/CodeGen/AArch64/atomic-ops.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
    llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
    llvm/test/CodeGen/AArch64/bf16-instructions.ll
    llvm/test/CodeGen/AArch64/bf16-select.ll
    llvm/test/CodeGen/AArch64/bf16-shuffle.ll
    llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
    llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
    llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
    llvm/test/CodeGen/AArch64/bitcast.ll
    llvm/test/CodeGen/AArch64/bitfield-insert.ll
    llvm/test/CodeGen/AArch64/bswap.ll
    llvm/test/CodeGen/AArch64/build-one-lane.ll
    llvm/test/CodeGen/AArch64/build-vector-extract.ll
    llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
    llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
    llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
    llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
    llvm/test/CodeGen/AArch64/combine-andintoload.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
    llvm/test/CodeGen/AArch64/concat-vector.ll
    llvm/test/CodeGen/AArch64/concatbinop.ll
    llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
    llvm/test/CodeGen/AArch64/dag-numsignbits.ll
    llvm/test/CodeGen/AArch64/dup.ll
    llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
    llvm/test/CodeGen/AArch64/ext-narrow-index.ll
    llvm/test/CodeGen/AArch64/extbinopload.ll
    llvm/test/CodeGen/AArch64/extract-bits.ll
    llvm/test/CodeGen/AArch64/extract-insert.ll
    llvm/test/CodeGen/AArch64/extract-lowbits.ll
    llvm/test/CodeGen/AArch64/extract-sext-zext.ll
    llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
    llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
    llvm/test/CodeGen/AArch64/extract-vector-elt.ll
    llvm/test/CodeGen/AArch64/f16-instructions.ll
    llvm/test/CodeGen/AArch64/fabs-fp128.ll
    llvm/test/CodeGen/AArch64/fabs.ll
    llvm/test/CodeGen/AArch64/faddp-half.ll
    llvm/test/CodeGen/AArch64/faddp.ll
    llvm/test/CodeGen/AArch64/faddsub.ll
    llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
    llvm/test/CodeGen/AArch64/fast-isel-gep.ll
    llvm/test/CodeGen/AArch64/fast-isel-shift.ll
    llvm/test/CodeGen/AArch64/fcmp.ll
    llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
    llvm/test/CodeGen/AArch64/fcopysign.ll
    llvm/test/CodeGen/AArch64/fcvt.ll
    llvm/test/CodeGen/AArch64/fcvt_combine.ll
    llvm/test/CodeGen/AArch64/fdiv-combine.ll
    llvm/test/CodeGen/AArch64/fdiv.ll
    llvm/test/CodeGen/AArch64/fexplog.ll
    llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
    llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
    llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
    llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
    llvm/test/CodeGen/AArch64/fminimummaximum.ll
    llvm/test/CodeGen/AArch64/fminmax.ll
    llvm/test/CodeGen/AArch64/fmla.ll
    llvm/test/CodeGen/AArch64/fmul.ll
    llvm/test/CodeGen/AArch64/fneg.ll
    llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
    llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
    llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
    llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
    llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
    llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
    llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
    llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
    llvm/test/CodeGen/AArch64/fpext.ll
    llvm/test/CodeGen/AArch64/fpmode.ll
    llvm/test/CodeGen/AArch64/fpow.ll
    llvm/test/CodeGen/AArch64/fpowi.ll
    llvm/test/CodeGen/AArch64/fptoi.ll
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptrunc.ll
    llvm/test/CodeGen/AArch64/freeze.ll
    llvm/test/CodeGen/AArch64/frem-power2.ll
    llvm/test/CodeGen/AArch64/frem.ll
    llvm/test/CodeGen/AArch64/fsincos.ll
    llvm/test/CodeGen/AArch64/fsqrt.ll
    llvm/test/CodeGen/AArch64/funnel-shift.ll
    llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
    llvm/test/CodeGen/AArch64/get_vector_length.ll
    llvm/test/CodeGen/AArch64/half.ll
    llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
    llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
    llvm/test/CodeGen/AArch64/icmp.ll
    llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
    llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
    llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
    llvm/test/CodeGen/AArch64/insert-extend.ll
    llvm/test/CodeGen/AArch64/insert-subvector.ll
    llvm/test/CodeGen/AArch64/insertextract.ll
    llvm/test/CodeGen/AArch64/insertshuffleload.ll
    llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
    llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
    llvm/test/CodeGen/AArch64/itofp-bf16.ll
    llvm/test/CodeGen/AArch64/itofp.ll
    llvm/test/CodeGen/AArch64/ldexp.ll
    llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
    llvm/test/CodeGen/AArch64/llrint-conv.ll
    llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
    llvm/test/CodeGen/AArch64/llvm.exp10.ll
    llvm/test/CodeGen/AArch64/llvm.frexp.ll
    llvm/test/CodeGen/AArch64/llvm.sincos.ll
    llvm/test/CodeGen/AArch64/load.ll
    llvm/test/CodeGen/AArch64/logic-shift.ll
    llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
    llvm/test/CodeGen/AArch64/lrint-conv.ll
    llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
    llvm/test/CodeGen/AArch64/lslfast.ll
    llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
    llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
    llvm/test/CodeGen/AArch64/memset-inline.ll
    llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
    llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
    llvm/test/CodeGen/AArch64/mla_mls_merge.ll
    llvm/test/CodeGen/AArch64/mul.ll
    llvm/test/CodeGen/AArch64/neon-bitcast.ll
    llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
    llvm/test/CodeGen/AArch64/neon-dot-product.ll
    llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
    llvm/test/CodeGen/AArch64/neon-extract.ll
    llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
    llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
    llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
    llvm/test/CodeGen/AArch64/neon-luti.ll
    llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
    llvm/test/CodeGen/AArch64/neon-perm.ll
    llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
    llvm/test/CodeGen/AArch64/neon-rshrn.ll
    llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
    llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
    llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
    llvm/test/CodeGen/AArch64/neon-truncstore.ll
    llvm/test/CodeGen/AArch64/neon-vcmla.ll
    llvm/test/CodeGen/AArch64/neon-wide-splat.ll
    llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
    llvm/test/CodeGen/AArch64/nontemporal-load.ll
    llvm/test/CodeGen/AArch64/nontemporal.ll
    llvm/test/CodeGen/AArch64/phi.ll
    llvm/test/CodeGen/AArch64/popcount.ll
    llvm/test/CodeGen/AArch64/pow.ll
    llvm/test/CodeGen/AArch64/pr-cf624b2.ll
    llvm/test/CodeGen/AArch64/pr58350.ll
    llvm/test/CodeGen/AArch64/pr58431.ll
    llvm/test/CodeGen/AArch64/pr61111.ll
    llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
    llvm/test/CodeGen/AArch64/ptradd.ll
    llvm/test/CodeGen/AArch64/qmovn.ll
    llvm/test/CodeGen/AArch64/rcpc3.ll
    llvm/test/CodeGen/AArch64/reduce-and.ll
    llvm/test/CodeGen/AArch64/reduce-or.ll
    llvm/test/CodeGen/AArch64/reduce-shuffle.ll
    llvm/test/CodeGen/AArch64/reduce-xor.ll
    llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
    llvm/test/CodeGen/AArch64/rem.ll
    llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
    llvm/test/CodeGen/AArch64/select-constant-xor.ll
    llvm/test/CodeGen/AArch64/seqpairspill.mir
    llvm/test/CodeGen/AArch64/setcc_knownbits.ll
    llvm/test/CodeGen/AArch64/sext.ll
    llvm/test/CodeGen/AArch64/shift-amount-mod.ll
    llvm/test/CodeGen/AArch64/shift-by-signext.ll
    llvm/test/CodeGen/AArch64/shift-mod.ll
    llvm/test/CodeGen/AArch64/shift.ll
    llvm/test/CodeGen/AArch64/shift_minsize.ll
    llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
    llvm/test/CodeGen/AArch64/shuffles.ll
    llvm/test/CodeGen/AArch64/shufflevector.ll
    llvm/test/CodeGen/AArch64/sink-and-fold.ll
    llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
    llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
    llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
    llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
    llvm/test/CodeGen/AArch64/sme-streaming-body.ll
    llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
    llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
    llvm/test/CodeGen/AArch64/smul_fix_sat.ll
    llvm/test/CodeGen/AArch64/spill-fold.mir
    llvm/test/CodeGen/AArch64/split-vector-insert.ll
    llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
    llvm/test/CodeGen/AArch64/store.ll
    llvm/test/CodeGen/AArch64/sub.ll
    llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
    llvm/test/CodeGen/AArch64/sve-doublereduct.ll
    llvm/test/CodeGen/AArch64/sve-extract-element.ll
    llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
    llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
    llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
    llvm/test/CodeGen/AArch64/sve-fadda-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
    llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
    llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
    llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
    llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
    llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
    llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
    llvm/test/CodeGen/AArch64/sve-insert-element.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll
    llvm/test/CodeGen/AArch64/sve-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sve-merging-stores.ll
    llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
    llvm/test/CodeGen/AArch64/sve-pr62151.ll
    llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
    llvm/test/CodeGen/AArch64/sve-select.ll
    llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
    llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
    llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
    llvm/test/CodeGen/AArch64/sve-vector-compress.ll
    llvm/test/CodeGen/AArch64/sve-vector-splat.ll
    llvm/test/CodeGen/AArch64/sve-vl-arith.ll
    llvm/test/CodeGen/AArch64/sve2-histcnt.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
    llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
    llvm/test/CodeGen/AArch64/tbl-loops.ll
    llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
    llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
    llvm/test/CodeGen/AArch64/umul_fix_sat.ll
    llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
    llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
    llvm/test/CodeGen/AArch64/vec-libcalls.ll
    llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-add.ll
    llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-bool.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
    llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
    llvm/test/CodeGen/AArch64/vector-compress.ll
    llvm/test/CodeGen/AArch64/vector-fcopysign.ll
    llvm/test/CodeGen/AArch64/vector-fcvt.ll
    llvm/test/CodeGen/AArch64/vector-llrint.ll
    llvm/test/CodeGen/AArch64/vector-lrint.ll
    llvm/test/CodeGen/AArch64/vldn_shuffle.ll
    llvm/test/CodeGen/AArch64/win64-fpowi.ll
    llvm/test/CodeGen/AArch64/win64_vararg.ll
    llvm/test/CodeGen/AArch64/xtn.ll
    llvm/test/CodeGen/AArch64/zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 3767b34bd5b0c5..cd4cd0fdd3747a 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -90,7 +90,7 @@ static cl::alias AArch64StreamingStackHazardSize(
 // are ironed out. This option allows the feature to be used in tests.
 static cl::opt<bool>
     EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
-                                 cl::init(false), cl::Hidden,
+                                 cl::init(true), cl::Hidden,
                                  cl::desc("Enable subreg liveness tracking"));
 
 static cl::opt<bool>

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index a1712a5ec7a27c..444f579f232420 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    casp x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspa x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspl x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index ee5fbe39b4492c..62af028defde56 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -1616,7 +1616,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1637,7 +1637,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1658,7 +1658,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1679,7 +1679,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1700,7 +1700,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -2343,10 +2343,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    casp x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2368,10 +2368,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspa x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2393,10 +2393,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspl x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2418,10 +2418,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2443,10 +2443,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -2996,7 +2996,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3017,7 +3017,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3038,7 +3038,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3059,7 +3059,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3080,7 +3080,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3531,7 +3531,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3552,7 +3552,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3573,7 +3573,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3594,7 +3594,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3615,7 +3615,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -4072,7 +4072,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4100,7 +4100,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4128,7 +4128,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4156,7 +4156,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4184,7 +4184,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lt
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4745,7 +4745,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4773,7 +4773,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4801,7 +4801,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, ge
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5362,7 +5362,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5390,7 +5390,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5418,7 +5418,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5446,7 +5446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5474,7 +5474,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, lo
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6002,7 +6002,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6030,7 +6030,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6058,7 +6058,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6114,7 +6114,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x7, x3, hs
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
index 83e383f335637c..f043f99327308b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
@@ -517,7 +517,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -534,7 +534,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -551,7 +551,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -568,7 +568,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -585,7 +585,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1119,7 +1119,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1136,7 +1136,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1153,7 +1153,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1170,7 +1170,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -2356,10 +2356,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    casp x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2379,10 +2379,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspa x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2402,10 +2402,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspl x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2425,10 +2425,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2448,10 +2448,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3479,7 +3479,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3498,7 +3498,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3517,7 +3517,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3536,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3555,7 +3555,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -4004,8 +4004,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4025,8 +4025,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4046,8 +4046,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4067,8 +4067,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4088,8 +4088,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4589,8 +4589,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4610,8 +4610,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4631,8 +4631,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4652,8 +4652,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4673,8 +4673,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5174,8 +5174,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5195,8 +5195,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5216,8 +5216,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5237,8 +5237,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5258,8 +5258,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5759,8 +5759,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5780,8 +5780,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5801,8 +5801,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5822,8 +5822,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5843,8 +5843,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
index 0c3ed9b0f1de0f..df7b57e7e18f46 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
@@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -559,7 +559,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -576,7 +576,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -593,7 +593,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -610,7 +610,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x7, x3
+; -O1:    adds x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1127,7 +1127,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1144,7 +1144,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1161,7 +1161,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1178,7 +1178,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1195,7 +1195,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x7, x3
+; -O1:    subs x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1674,7 +1674,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1693,7 +1693,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1712,7 +1712,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1731,7 +1731,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1750,7 +1750,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
+; -O1:    and x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -2406,10 +2406,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    casp x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2429,10 +2429,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspa x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2452,10 +2452,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspl x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2475,10 +2475,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2498,10 +2498,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x7, x3
-; -O1:    mvn x10, x8
-; -O1:    mvn x11, x9
-; -O1:    caspal x4, x5, x10, x11, [x0]
+; -O1:    and x9, x5, x3
+; -O1:    mvn x8, x8
+; -O1:    mvn x9, x9
+; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3049,7 +3049,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3087,7 +3087,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3106,7 +3106,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3125,7 +3125,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x7, x3
+; -O1:    orr x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3574,7 +3574,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3593,7 +3593,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3612,7 +3612,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3631,7 +3631,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3650,7 +3650,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x7, x3
+; -O1:    eor x9, x5, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -4099,8 +4099,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4120,8 +4120,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4141,8 +4141,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4162,8 +4162,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4183,8 +4183,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lt
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4684,8 +4684,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4705,8 +4705,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4726,8 +4726,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4747,8 +4747,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4768,8 +4768,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, ge
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5269,8 +5269,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5290,8 +5290,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5311,8 +5311,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5332,8 +5332,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5353,8 +5353,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, lo
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5854,8 +5854,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5896,8 +5896,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5917,8 +5917,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5938,8 +5938,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x7
-; -O1:    csel x9, x7, x3, hs
+; -O1:    cmp x3, x5
+; -O1:    csel x9, x5, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
index 1fe63c9be8c629..80310a11add697 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
@@ -53,10 +53,6 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap:
 ; CHECK-CAS-O1:       // %bb.0:
-; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspa x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -180,10 +176,6 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
 ; CHECK-CAS-O1:       // %bb.0:
-; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -307,10 +299,6 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire:
 ; CHECK-CAS-O1:       // %bb.0:
-; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -434,10 +422,6 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic:
 ; CHECK-CAS-O1:       // %bb.0:
-; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -658,10 +642,6 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_return:
 ; CHECK-CAS-O1:       // %bb.0:
-; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspa x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov x0, x2
 ; CHECK-CAS-O1-NEXT:    mov x1, x3

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index de3f323891a36a..842dd353bc6b22 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1938,7 +1938,6 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
 define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NOLSE-O1-NEXT:  LBB28_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
@@ -2993,7 +2992,6 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
 define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NOLSE-O1-NEXT:  LBB38_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
@@ -4054,7 +4052,6 @@ define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    stxr w9, w1, [x8]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB48_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i32:
@@ -5996,7 +5993,6 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NOLSE-O1-NEXT:  LBB67_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w0, [x8]
@@ -6009,12 +6005,10 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB67_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
 ; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB67_4: ; %cmpxchg.nostore
 ; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i8:
@@ -6103,7 +6097,6 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NOLSE-O1-NEXT:  LBB68_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w0, [x8]
@@ -6116,12 +6109,10 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB68_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
 ; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB68_4: ; %cmpxchg.nostore
 ; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i16:
@@ -6221,12 +6212,10 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB69_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
 ; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB69_4: ; %cmpxchg.nostore
 ; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i32:
@@ -6561,25 +6550,36 @@ define internal float @bitcast_to_float(ptr %ptr) {
 }
 
 define internal half @bitcast_to_half(ptr %ptr) {
-; CHECK-NOLSE-LABEL: bitcast_to_half:
-; CHECK-NOLSE:       ; %bb.0:
-; CHECK-NOLSE-NEXT:    ldarh w8, [x0]
-; CHECK-NOLSE-NEXT:    fmov s0, w8
-; CHECK-NOLSE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
-; CHECK-NOLSE-NEXT:    ret
+; CHECK-NOLSE-O1-LABEL: bitcast_to_half:
+; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ldarh w8, [x0]
+; CHECK-NOLSE-O1-NEXT:    fmov s0, w8
+; CHECK-NOLSE-O1-NEXT:    ret
 ;
-; CHECK-OUTLINE-LABEL: bitcast_to_half:
-; CHECK-OUTLINE:       ; %bb.0:
-; CHECK-OUTLINE-NEXT:    ldarh w8, [x0]
-; CHECK-OUTLINE-NEXT:    fmov s0, w8
-; CHECK-OUTLINE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
-; CHECK-OUTLINE-NEXT:    ret
+; CHECK-OUTLINE-O1-LABEL: bitcast_to_half:
+; CHECK-OUTLINE-O1:       ; %bb.0:
+; CHECK-OUTLINE-O1-NEXT:    ldarh w8, [x0]
+; CHECK-OUTLINE-O1-NEXT:    fmov s0, w8
+; CHECK-OUTLINE-O1-NEXT:    ret
+;
+; CHECK-NOLSE-O0-LABEL: bitcast_to_half:
+; CHECK-NOLSE-O0:       ; %bb.0:
+; CHECK-NOLSE-O0-NEXT:    ldarh w8, [x0]
+; CHECK-NOLSE-O0-NEXT:    fmov s0, w8
+; CHECK-NOLSE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-NOLSE-O0-NEXT:    ret
+;
+; CHECK-OUTLINE-O0-LABEL: bitcast_to_half:
+; CHECK-OUTLINE-O0:       ; %bb.0:
+; CHECK-OUTLINE-O0-NEXT:    ldarh w8, [x0]
+; CHECK-OUTLINE-O0-NEXT:    fmov s0, w8
+; CHECK-OUTLINE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-OUTLINE-O0-NEXT:    ret
 ;
 ; CHECK-LSE-O1-LABEL: bitcast_to_half:
 ; CHECK-LSE-O1:       ; %bb.0:
 ; CHECK-LSE-O1-NEXT:    ldarh w8, [x0]
 ; CHECK-LSE-O1-NEXT:    fmov s0, w8
-; CHECK-LSE-O1-NEXT:    ; kill: def $h0 killed $h0 killed $s0
 ; CHECK-LSE-O1-NEXT:    ret
 ;
 ; CHECK-LSE-O0-LABEL: bitcast_to_half:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index c6819ff39ed33e..7efc31f0cf2597 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -12,13 +12,13 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
@@ -26,14 +26,14 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.cmpxchg.nostore:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0
   %val = extractvalue { i32, i1 } %pair, 0
@@ -46,19 +46,19 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0, $x2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
+  ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, pcsections !0 :: (load (s32) from %ir.pnew)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x9
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x9:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x8, $x9
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x8:0x0000000000000040, $x9:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRW renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1
@@ -66,14 +66,14 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.cmpxchg.nostore:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %new = load i32, ptr %pnew, !pcsections !0
   %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0
@@ -91,13 +91,13 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STLXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
@@ -105,14 +105,14 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.cmpxchg.nostore:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acq_rel monotonic, !pcsections !0
   %val = extractvalue { i32, i1 } %pair, 0
@@ -243,16 +243,16 @@ define i32 @fetch_and_nand(ptr %p) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
   ; CHECK-NEXT:   $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %val = atomicrmw nand ptr %p, i32 7 release, !pcsections !0
   ret i32 %val
@@ -295,15 +295,15 @@ define i32 @fetch_and_or(ptr %p) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %val = atomicrmw or ptr %p, i32 5 seq_cst, !pcsections !0
   ret i32 %val
@@ -726,15 +726,15 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst, !pcsections !0
   ret i8 %res
@@ -746,20 +746,18 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w1 = KILL $w1, implicit-def $x1
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT:   liveins: $x0, $x1:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xchg ptr %ptr, i8 %rhs monotonic, !pcsections !0
   ret i8 %res
@@ -775,15 +773,15 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw sub ptr %ptr, i8 %rhs acquire, !pcsections !0
   ret i8 %res
@@ -799,15 +797,15 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw and ptr %ptr, i8 %rhs release, !pcsections !0
   ret i8 %res
@@ -823,15 +821,15 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw or ptr %ptr, i8 %rhs seq_cst, !pcsections !0
   ret i8 %res
@@ -847,15 +845,15 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xor ptr %ptr, i8 %rhs monotonic, !pcsections !0
   ret i8 %res
@@ -871,17 +869,17 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw min ptr %ptr, i8 %rhs acquire, !pcsections !0
   ret i8 %res
@@ -897,17 +895,17 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw max ptr %ptr, i8 %rhs release, !pcsections !0
   ret i8 %res
@@ -925,11 +923,11 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 7
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRB killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -953,11 +951,11 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 7
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRB killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -979,15 +977,15 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst, !pcsections !0
   ret i16 %res
@@ -999,20 +997,18 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w1 = KILL $w1, implicit-def $x1
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT:   liveins: $x0, $x1:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xchg ptr %ptr, i16 %rhs monotonic, !pcsections !0
   ret i16 %res
@@ -1028,15 +1024,15 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw sub ptr %ptr, i16 %rhs acquire, !pcsections !0
   ret i16 %res
@@ -1052,15 +1048,15 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw and ptr %ptr, i16 %rhs release, !pcsections !0
   ret i16 %res
@@ -1076,15 +1072,15 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw or ptr %ptr, i16 %rhs seq_cst, !pcsections !0
   ret i16 %res
@@ -1100,15 +1096,15 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xor ptr %ptr, i16 %rhs monotonic, !pcsections !0
   ret i16 %res
@@ -1124,17 +1120,17 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw min ptr %ptr, i16 %rhs acquire, !pcsections !0
   ret i16 %res
@@ -1150,17 +1146,17 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw max ptr %ptr, i16 %rhs release, !pcsections !0
   ret i16 %res
@@ -1178,11 +1174,11 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 15
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRH killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1206,11 +1202,11 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 15
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRH killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1229,37 +1225,34 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
-  ; CHECK-NEXT:   renamable $w2 = KILL $w2, implicit-def $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x2:0x0000000000000040, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x0:0x0000000000000040, $x2:0x0000000000000040, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w1 = MOVZWi 1, 0
-  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.nostore:
-  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $w1 = ORRWrs $wzr, $wzr, 0
   ; CHECK-NEXT:   CLREX 15, pcsections !0
-  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0
   ret { i8, i1 } %res
@@ -1272,37 +1265,34 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
-  ; CHECK-NEXT:   renamable $w2 = KILL $w2, implicit-def $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x2:0x0000000000000040, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x0:0x0000000000000040, $x2:0x0000000000000040, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w1 = MOVZWi 1, 0
-  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.nostore:
-  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $w1 = ORRWrs $wzr, $wzr, 0
   ; CHECK-NEXT:   CLREX 15, pcsections !0
-  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0
   ret { i16, i1 } %res

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index b681e3b2231174..0157c429120a9e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -252,7 +252,6 @@ define i32 @udiv_div_by_180(i32 %x)
 ; SDAG-NEXT:    movk w8, #364, lsl #16
 ; SDAG-NEXT:    umull x8, w9, w8
 ; SDAG-NEXT:    lsr x0, x8, #32
-; SDAG-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SDAG-NEXT:    ret
 ;
 ; GISEL-LABEL: udiv_div_by_180:
@@ -262,7 +261,6 @@ define i32 @udiv_div_by_180(i32 %x)
 ; GISEL-NEXT:    movk w8, #364, lsl #16
 ; GISEL-NEXT:    umull x8, w9, w8
 ; GISEL-NEXT:    lsr x0, x8, #32
-; GISEL-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; GISEL-NEXT:    ret
 {
   %truncate = and i32 %x, 255

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll b/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
index e41b25fb2b88f1..1422e32c468bf9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
@@ -8,10 +8,8 @@ target triple = "arm64-apple-ios14.5.0"
 define <2 x i16> @pr63826_v2s16(<2 x i16> %vec) {
 ; CHECK-LABEL: pr63826_v2s16:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    mov.s v0[0], w8
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %vec1 = insertelement <2 x i16> %vec, i16 1, i32 0
   ret <2 x i16> %vec1
@@ -20,10 +18,8 @@ define <2 x i16> @pr63826_v2s16(<2 x i16> %vec) {
 define <2 x i8> @pr63826_v2s8(<2 x i8> %vec) {
 ; CHECK-LABEL: pr63826_v2s8:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    mov.s v0[0], w8
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %vec1 = insertelement <2 x i8> %vec, i8 1, i32 0
   ret <2 x i8> %vec1
@@ -32,10 +28,8 @@ define <2 x i8> @pr63826_v2s8(<2 x i8> %vec) {
 define <4 x i8> @pr63826_v4s8(<4 x i8> %vec) {
 ; CHECK-LABEL: pr63826_v4s8:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    mov.h v0[0], w8
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %vec1 = insertelement <4 x i8> %vec, i8 1, i32 0
   ret <4 x i8> %vec1

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index def4192b0e005d..a711588ab82c18 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -330,7 +330,6 @@ entry:
 define i16 @addv_v3i16(<3 x i16> %a) {
 ; CHECK-LABEL: addv_v3i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], wzr
 ; CHECK-NEXT:    addv h0, v0.4h
 ; CHECK-NEXT:    fmov w0, s0
@@ -434,9 +433,6 @@ entry:
 define i64 @addv_v3i64(<3 x i64> %a) {
 ; CHECK-LABEL: addv_v3i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v2.d[1], xzr
 ; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
index 4afe3626864401..ddd19e61cdd9bb 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
@@ -1035,7 +1035,6 @@ define <2 x double> @test_v1f64(<1 x double> %0, ptr %1) {
 ; CHECK-LABEL: test_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mvni v1.2s, #31, msl #16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll b/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
index 52b542790e82d5..86cc39696f10ca 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -24,7 +24,6 @@ entry:
 define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfdot_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfdot v0.2s, v1.4h, v2.2h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -64,7 +63,6 @@ entry:
 define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfdotq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfdot v0.4s, v1.8h, v2.2h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -108,7 +106,6 @@ entry:
 define <4 x float> @test_vbfmlalbq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfmlalbq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfmlalb v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -131,7 +128,6 @@ entry:
 define <4 x float> @test_vbfmlaltq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfmlaltq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfmlalt v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
index 9734ab35bd6b2d..6da3743cf86f5d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
@@ -14,9 +14,6 @@ define <1 x i8> @test_bitf_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
 ;
 ; CHECK-GI-LABEL: test_bitf_v1i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.b[0]
 ; CHECK-GI-NEXT:    umov w9, v1.b[0]
 ; CHECK-GI-NEXT:    umov w10, v0.b[0]
@@ -42,9 +39,6 @@ define <1 x i16> @test_bitf_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
 ;
 ; CHECK-GI-LABEL: test_bitf_v1i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.h[0]
 ; CHECK-GI-NEXT:    umov w9, v1.h[0]
 ; CHECK-GI-NEXT:    umov w10, v0.h[0]
@@ -77,7 +71,6 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
 ; CHECK-GI-NEXT:    and w8, w8, w10
 ; CHECK-GI-NEXT:    orr w8, w9, w8
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %neg = xor <1 x i32> %C, <i32 -1>
   %and = and <1 x i32> %neg, %B

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 45ad4b07ff66f7..28ef5deb3bd38a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -14,9 +14,6 @@ define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
 ;
 ; CHECK-GI-LABEL: test_bit_v1i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.b[0]
 ; CHECK-GI-NEXT:    umov w9, v1.b[0]
 ; CHECK-GI-NEXT:    umov w10, v0.b[0]
@@ -42,9 +39,6 @@ define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
 ;
 ; CHECK-GI-LABEL: test_bit_v1i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.h[0]
 ; CHECK-GI-NEXT:    umov w9, v1.h[0]
 ; CHECK-GI-NEXT:    umov w10, v0.h[0]
@@ -77,7 +71,6 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
 ; CHECK-GI-NEXT:    bic w8, w10, w8
 ; CHECK-GI-NEXT:    orr w8, w9, w8
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %and = and <1 x i32> %C, %B
   %neg = xor <1 x i32> %C, <i32 -1>

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
index e086ab92421fb5..4c77090da14c6e 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
@@ -5,11 +5,7 @@ define <2 x i64> @test_mul_add_2x64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: test_mul_add_2x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %mul = mul <2 x i64> %b, %c
   %add = add <2 x i64> %a, %mul
@@ -20,11 +16,7 @@ define <1 x i64> @test_mul_add_1x64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) {
 ; CHECK-LABEL: test_mul_add_1x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %mul = mul <1 x i64> %b, %c
   %add = add <1 x i64> %mul, %a
@@ -35,11 +27,7 @@ define <2 x i64> @test_mul_sub_2x64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: test_mul_sub_2x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mls z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %mul = mul <2 x i64> %b, %c
   %sub = sub <2 x i64> %a, %mul
@@ -50,10 +38,6 @@ define <2 x i64> @test_mul_sub_2x64_2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,
 ; CHECK-LABEL: test_mul_sub_2x64_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    movprfx z1, z2
 ; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z3.d
@@ -69,13 +53,8 @@ define <2 x i64> @test_mul_sub_2x64_3(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,
 ; CHECK-LABEL: test_mul_sub_2x64_3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z3.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %div = sdiv <2 x i64> %a, %b
   %mul = mul <2 x i64> %c, %d
@@ -87,8 +66,6 @@ define <1 x i64> @test_mul_sub_1x64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) {
 ; CHECK-LABEL: test_mul_sub_1x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z2.d
 ; CHECK-NEXT:    sub d0, d1, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll b/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
index b1b995931ac024..59b98e84aa7d6e 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
@@ -4,8 +4,6 @@
 define i16 @test_add_zext_v8i16(<8 x i8> %a, <8 x i8> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_add_zext_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uaddlv h0, v0.16b
 ; CHECK-NEXT:    umov w0, v0.h[0]
@@ -20,8 +18,6 @@ define i16 @test_add_zext_v8i16(<8 x i8> %a, <8 x i8> %b) local_unnamed_addr #0
 define i32 @test_add_zext_v4i32(<4 x i16> %a, <4 x i16> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_add_zext_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uaddlv s0, v0.8h
 ; CHECK-NEXT:    fmov w0, s0
@@ -36,8 +32,6 @@ define i32 @test_add_zext_v4i32(<4 x i16> %a, <4 x i16> %b) local_unnamed_addr #
 define i64 @test_add_zext_v2i64(<2 x i32> %a, <2 x i32> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_add_zext_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uaddlv d0, v0.4s
 ; CHECK-NEXT:    fmov x0, d0

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
index 36b81d8e495ce6..8a6e7e4841f9cc 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
@@ -84,7 +84,6 @@ entry:
 define <vscale x 2 x i64> @dupsext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupsext_v2i8_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtb x8, w0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -133,7 +132,6 @@ entry:
 define <vscale x 2 x i64> @dupsext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupsext_v2i16_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxth x8, w0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -150,7 +148,6 @@ entry:
 define <vscale x 2 x i64> @dupsext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupsext_v2i32_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -247,7 +244,6 @@ entry:
 define <vscale x 2 x i64> @dupzext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupzext_v2i8_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0xff
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -296,7 +292,6 @@ entry:
 define <vscale x 2 x i64> @dupzext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupzext_v2i16_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0xffff
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 0ce92a20fb3a17..0d450a9a9a5c44 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -106,7 +106,6 @@ define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
 ;
 ; CHECK-GI-LABEL: dupsext_v2i32_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-GI-NEXT:    dup v1.2d, x8
@@ -203,7 +202,6 @@ define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    movi v1.2d, #0x0000000000ffff
 ; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    and x8, x0, #0xffff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    dup v1.2d, x8
@@ -408,10 +406,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
 ; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-GI-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -461,10 +459,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
 ; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
-; CHECK-GI-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI15_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index 888aa9d7f9cdcc..e9e4bca74cd4ee 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -15,7 +15,6 @@ define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data)
 define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
 ; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.b, z0.b[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <16 x i8> %data, i8 1
@@ -27,7 +26,6 @@ define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
 define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
 ; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.b, z0.b[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x i8> %data, i8 1
@@ -50,7 +48,6 @@ define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data)
 define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
 ; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x i16> %data, i16 1
@@ -62,7 +59,6 @@ define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
 define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
 ; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x i16> %data, i16 1
@@ -85,7 +81,6 @@ define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data)
 define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
 ; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x i32> %data, i32 1
@@ -97,7 +92,6 @@ define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
 define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
 ; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x i32> %data, i32 1
@@ -120,7 +114,6 @@ define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data)
 define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
 ; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x i64> %data, i64 1
@@ -178,7 +171,6 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %dat
 define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x half> %data, i16 1
@@ -190,7 +182,6 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
 define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x half> %data, i16 1
@@ -236,7 +227,6 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %dat
 define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x half> %data, i16 1
@@ -248,7 +238,6 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
 define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x half> %data, i16 1
@@ -293,7 +282,6 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_nxv2f16(<vscale x 2 x half> %dat
 define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f16_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x half> %data, i16 1
@@ -305,7 +293,6 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
 define <vscale x 2 x half> @dup_extract_nxv2f16_v4f16(<4 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f16_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x half> %data, i16 1
@@ -340,7 +327,6 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_nxv2f32(<vscale x 2 x float> %d
 define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f32_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x float> %data, i32 1
@@ -352,7 +338,6 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
 define <vscale x 4 x float> @dup_extract_nxv4f32_v2f32(<2 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f32_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x float> %data, i32 1
@@ -386,7 +371,6 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_nxv2f32(<vscale x 2 x float> %d
 define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f32_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x float> %data, i32 1
@@ -398,7 +382,6 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
 define <vscale x 2 x float> @dup_extract_nxv2f32_v2f32(<2 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f32_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x float> %data, i32 1
@@ -421,7 +404,6 @@ define <vscale x 2 x double> @dup_extract_nxv2f64_nxv2f64(<vscale x 2 x double>
 define <vscale x 2 x double> @dup_extract_nxv2f64_v2f64(<2 x double> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f64_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x double> %data, i64 1
@@ -479,7 +461,6 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv2bf16(<vscale x 2 x bfloat
 define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv8bf16_v8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x bfloat> %data, i16 1
@@ -491,7 +472,6 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
 define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v4bf16(<4 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv8bf16_v4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x bfloat> %data, i16 1
@@ -537,7 +517,6 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat
 define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv4bf16_v8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x bfloat> %data, i16 1
@@ -549,7 +528,6 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
 define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v4bf16(<4 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv4bf16_v4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x bfloat> %data, i16 1
@@ -594,7 +572,6 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat
 define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv2bf16_v8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x bfloat> %data, i16 1
@@ -606,7 +583,6 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
 define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v4bf16(<4 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv2bf16_v4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x bfloat> %data, i16 1

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index 63dcafed2320a0..bd8ad9c0161936 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -13,7 +13,6 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK0-SDAG-LABEL: halfword:
 ; CHECK0-SDAG:       // %bb.0:
 ; CHECK0-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x8, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK0-SDAG-NEXT:    mov x19, x0
@@ -43,7 +42,6 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK3-SDAG-LABEL: halfword:
 ; CHECK3-SDAG:       // %bb.0:
 ; CHECK3-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK3-SDAG-NEXT:    mov x19, x0
@@ -83,7 +81,6 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK0-SDAG-LABEL: word:
 ; CHECK0-SDAG:       // %bb.0:
 ; CHECK0-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK0-SDAG-NEXT:    mov x19, x0
@@ -113,7 +110,6 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK3-SDAG-LABEL: word:
 ; CHECK3-SDAG:       // %bb.0:
 ; CHECK3-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK3-SDAG-NEXT:    mov x19, x0
@@ -153,7 +149,6 @@ define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK0-SDAG-LABEL: doubleword:
 ; CHECK0-SDAG:       // %bb.0:
 ; CHECK0-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK0-SDAG-NEXT:    mov x19, x0
@@ -183,7 +178,6 @@ define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK3-SDAG-LABEL: doubleword:
 ; CHECK3-SDAG:       // %bb.0:
 ; CHECK3-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK3-SDAG-NEXT:    mov x19, x0
@@ -231,7 +225,6 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
 ; CHECK0-SDAG-NEXT:    .cfi_offset w21, -24
 ; CHECK0-SDAG-NEXT:    .cfi_offset w22, -32
 ; CHECK0-SDAG-NEXT:    .cfi_offset w30, -48
-; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x8, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    mov x19, x0
 ; CHECK0-SDAG-NEXT:    lsl x21, x8, #1
@@ -277,7 +270,6 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
 ; CHECK3-SDAG-NEXT:    .cfi_offset w21, -24
 ; CHECK3-SDAG-NEXT:    .cfi_offset w22, -32
 ; CHECK3-SDAG-NEXT:    .cfi_offset w30, -48
-; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    mov x19, x0
 ; CHECK3-SDAG-NEXT:    ldrh w20, [x0, x21, lsl #1]

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
index 07fbe5d7310f60..7141f53802bff7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
@@ -27,9 +27,8 @@ BB:
 define void @f_undef_15(<8 x i64> %a, ptr %dst) {
 ; CHECK-LABEL: f_undef_15:
 ; CHECK:       // %bb.0: // %BB
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-NEXT:    mov x8, x0
 ; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov x8, x0
 ; CHECK-NEXT:    st2 { v0.2d, v1.2d }, [x8], #32
 ; CHECK-NEXT:    st2 { v0.2d, v1.2d }, [x8]
 ; CHECK-NEXT:    add x8, x0, #64
@@ -46,19 +45,17 @@ BB:
 define void @f_undef_1(<8 x i64> %a, ptr %dst) {
 ; CHECK-LABEL: f_undef_1:
 ; CHECK:       // %bb.0: // %BB
-; CHECK-NEXT:    mov v16.16b, v0.16b
-; CHECK-NEXT:    mov v5.16b, v2.16b
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q1_q2
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $q3_q4
+; CHECK-NEXT:    mov v4.16b, v2.16b
+; CHECK-NEXT:    mov v5.16b, v0.16b
 ; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    mov v6.16b, v0.16b
 ; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    mov v4.16b, v3.16b
-; CHECK-NEXT:    mov v17.16b, v16.16b
-; CHECK-NEXT:    mov v6.16b, v5.16b
-; CHECK-NEXT:    st2 { v16.2d, v17.2d }, [x8], #32
+; CHECK-NEXT:    st2 { v5.2d, v6.2d }, [x8], #32
+; CHECK-NEXT:    mov v5.16b, v4.16b
 ; CHECK-NEXT:    st2 { v1.2d, v2.2d }, [x8]
 ; CHECK-NEXT:    add x8, x0, #64
-; CHECK-NEXT:    st2 { v5.2d, v6.2d }, [x8]
+; CHECK-NEXT:    st2 { v4.2d, v5.2d }, [x8]
+; CHECK-NEXT:    mov v4.16b, v3.16b
 ; CHECK-NEXT:    add x8, x0, #96
 ; CHECK-NEXT:    st2 { v3.2d, v4.2d }, [x8]
 ; CHECK-NEXT:    ret
@@ -73,11 +70,10 @@ define void @noundefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) {
 ; CHECK-LABEL: noundefs:
 ; CHECK:       // %bb.0: // %BB
 ; CHECK-NEXT:    mov v5.16b, v2.16b
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $q2_q3
 ; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    mov v2.16b, v1.16b
+; CHECK-NEXT:    mov v2.16b, v3.16b
 ; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x0], #32
-; CHECK-NEXT:    st2 { v2.4s, v3.4s }, [x0]
+; CHECK-NEXT:    st2 { v1.4s, v2.4s }, [x0]
 ; CHECK-NEXT:    ret
 BB:
   %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 undef, i32 14, i32 7, i32 15>
@@ -89,11 +85,10 @@ define void @undefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) {
 ; CHECK-LABEL: undefs:
 ; CHECK:       // %bb.0: // %BB
 ; CHECK-NEXT:    mov v5.16b, v2.16b
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $q2_q3
 ; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    mov v2.16b, v1.16b
+; CHECK-NEXT:    mov v2.16b, v3.16b
 ; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x0], #32
-; CHECK-NEXT:    st2 { v2.4s, v3.4s }, [x0]
+; CHECK-NEXT:    st2 { v1.4s, v2.4s }, [x0]
 ; CHECK-NEXT:    ret
 BB:
   %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 15>

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index 317feb5ad9ad08..16479a526ab0a2 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -6,7 +6,6 @@ define <2 x i16> @test0(ptr %i16_ptr, i64 %inc) {
 ; CHECK-LE-LABEL: test0:
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test0:
@@ -25,7 +24,6 @@ define <2 x i16> @test1(ptr %v2i16_ptr) {
 ; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-LE-NEXT:    add x8, x0, #2
 ; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test1:
@@ -45,7 +43,6 @@ define <2 x i16> @test2(ptr %i16_ptr, i64 %inc) {
 ; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-LE-NEXT:    add x8, x0, x1, lsl #1
 ; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test2:
@@ -69,7 +66,6 @@ define <2 x i8> @test3(ptr %v2i8_ptr) {
 ; CHECK-LE-NEXT:    ld1 { v0.b }[0], [x0]
 ; CHECK-LE-NEXT:    add x8, x0, #1
 ; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test3:
@@ -88,7 +84,6 @@ define <4 x i8> @test4(ptr %v4i8_ptr) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test4:
@@ -109,7 +104,6 @@ define <2 x i32> @fsext_v2i32(ptr %a) {
 ; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
 ; CHECK-LE-NEXT:    fmov s0, w8
 ; CHECK-LE-NEXT:    mov v0.s[1], w9
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fsext_v2i32:
@@ -253,7 +247,6 @@ define <2 x i16> @fsext_v2i16(ptr %a) {
 ; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
 ; CHECK-LE-NEXT:    fmov s0, w8
 ; CHECK-LE-NEXT:    mov v0.s[1], w9
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fsext_v2i16:
@@ -297,7 +290,6 @@ define <4 x i16> @fsext_v4i16(ptr %a) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fsext_v4i16:
@@ -359,7 +351,6 @@ define <4 x i16> @fzext_v4i16(ptr %a) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fzext_v4i16:
@@ -456,7 +447,6 @@ define <4 x i8> @strict_align_aligned(ptr %v4i8_ptr) "target-features"="+strict-
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: strict_align_aligned:
@@ -480,7 +470,6 @@ define <4 x i8> @strict_align_unaligned(ptr %v4i8_ptr) "target-features"="+stric
 ; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
 ; CHECK-LE-NEXT:    add x8, x0, #3
 ; CHECK-LE-NEXT:    ld1 { v0.b }[6], [x8]
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: strict_align_unaligned:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index fb6575cc0ee83b..6f03e14bbbe8ed 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s --check-prefix=CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=CHECK-GI
 
 define void @matrix_mul_unsigned(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i16 %val) {
 ; CHECK-SD-LABEL: matrix_mul_unsigned:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -91,7 +90,6 @@ define void @matrix_mul_signed(i32 %N, ptr nocapture %C, ptr nocapture readonly
 ; CHECK-SD-LABEL: matrix_mul_signed:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:  .LBB1_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -110,7 +108,6 @@ define void @matrix_mul_signed(i32 %N, ptr nocapture %C, ptr nocapture readonly
 ; CHECK-GI-LABEL: matrix_mul_signed:
 ; CHECK-GI:       // %bb.0: // %vector.header
 ; CHECK-GI-NEXT:    sxth w9, w3
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    dup v0.4s, w9
 ; CHECK-GI-NEXT:    and x8, x8, #0xfffffff8
@@ -178,9 +175,7 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
 ; CHECK-SD-LABEL: matrix_mul_double_shuffle:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 killed $x0 def $x0
 ; CHECK-SD-NEXT:  .LBB2_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldrh w9, [x2], #16
@@ -757,7 +752,6 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
 ; CHECK-SD-LABEL: sink_v2z64_1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov x8, xzr
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:  .LBB6_1: // %loop
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldr d1, [x0]
@@ -823,7 +817,6 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
 ; CHECK-SD-LABEL: sink_v4i64_1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov x8, xzr
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:  .LBB7_1: // %loop
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldr q1, [x0]
@@ -1024,7 +1017,6 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea
 ; CHECK-SD-LABEL: matrix_mul_unsigned_and:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:  .LBB10_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -1109,7 +1101,6 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt
 ; CHECK-SD-LABEL: matrix_mul_unsigned_and_double:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.8h, w3
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff0
 ; CHECK-SD-NEXT:  .LBB11_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -1204,7 +1195,6 @@ define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture reado
 ; CHECK-SD-LABEL: matrix_mul_signed_and:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    and w9, w3, #0xffff
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:  .LBB12_1: // %vector.body
@@ -1292,7 +1282,6 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur
 ; CHECK-SD-LABEL: matrix_mul_signed_and_double:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    and w9, w3, #0xffff
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff0
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:  .LBB13_1: // %vector.body
@@ -1389,6 +1378,3 @@ for.end12:                                        ; preds = %vector.body
 }
 
 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index f7aa57a068a4ce..53e07eff1541d6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -458,7 +458,6 @@ define i8 @sminv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxtb w8, w8
@@ -510,7 +509,6 @@ define i8 @sminv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -575,7 +573,6 @@ define i16 @sminv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxth w8, w8
@@ -592,7 +589,6 @@ entry:
 define i16 @sminv_v3i16(<3 x i16> %a) {
 ; CHECK-LABEL: sminv_v3i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    mov v0.h[3], w8
 ; CHECK-NEXT:    sminv h0, v0.4h
@@ -710,10 +706,7 @@ entry:
 define i64 @sminv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: sminv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v2.d[1], x8
 ; CHECK-SD-NEXT:    cmgt v1.2d, v2.2d, v0.2d
@@ -726,10 +719,7 @@ define i64 @sminv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    cmgt v1.2d, v2.2d, v0.2d
@@ -810,7 +800,6 @@ define i8 @smaxv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxtb w8, w8
@@ -862,7 +851,6 @@ define i8 @smaxv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -927,7 +915,6 @@ define i16 @smaxv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxth w8, w8
@@ -944,7 +931,6 @@ entry:
 define i16 @smaxv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: smaxv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, #-32768 // =0xffff8000
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
 ; CHECK-SD-NEXT:    smaxv h0, v0.4h
@@ -953,7 +939,6 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
 ; CHECK-GI-NEXT:    smaxv h0, v0.4h
@@ -1071,10 +1056,7 @@ entry:
 define i64 @smaxv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: smaxv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v2.d[1], x8
 ; CHECK-SD-NEXT:    cmgt v1.2d, v0.2d, v2.2d
@@ -1087,10 +1069,7 @@ define i64 @smaxv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    cmgt v1.2d, v0.2d, v2.2d
@@ -1171,7 +1150,6 @@ define i8 @uminv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xff
@@ -1221,7 +1199,6 @@ define i8 @uminv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -1286,7 +1263,6 @@ define i16 @uminv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xffff
@@ -1303,7 +1279,6 @@ entry:
 define i16 @uminv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: uminv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
 ; CHECK-SD-NEXT:    uminv h0, v0.4h
@@ -1312,7 +1287,6 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, #65535 // =0xffff
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
 ; CHECK-GI-NEXT:    uminv h0, v0.4h
@@ -1430,10 +1404,7 @@ entry:
 define i64 @uminv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: uminv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov x8, #-1 // =0xffffffffffffffff
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v2.d[1], x8
 ; CHECK-SD-NEXT:    cmhi v1.2d, v2.2d, v0.2d
@@ -1446,10 +1417,7 @@ define i64 @uminv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x8, #-1 // =0xffffffffffffffff
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    cmhi v1.2d, v2.2d, v0.2d
@@ -1530,7 +1498,6 @@ define i8 @umaxv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xff
@@ -1580,7 +1547,6 @@ define i8 @umaxv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -1645,7 +1611,6 @@ define i16 @umaxv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xffff
@@ -1662,7 +1627,6 @@ entry:
 define i16 @umaxv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: umaxv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.h[3], wzr
 ; CHECK-SD-NEXT:    umaxv h0, v0.4h
 ; CHECK-SD-NEXT:    fmov w0, s0
@@ -1670,7 +1634,6 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, #0 // =0x0
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
 ; CHECK-GI-NEXT:    umaxv h0, v0.4h
@@ -1787,10 +1750,7 @@ entry:
 define i64 @umaxv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: umaxv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.16b, v2.16b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v3.d[1], xzr
 ; CHECK-SD-NEXT:    cmhi v3.2d, v0.2d, v3.2d
@@ -1804,9 +1764,6 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], xzr
 ; CHECK-GI-NEXT:    cmhi v1.2d, v0.2d, v2.2d

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
index bccb70ccd5c036..37c4ed3607f047 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
@@ -38,7 +38,7 @@ entry:
 define ptr @memset_tagged_1_zeroval(ptr %dst, i64 %size) {
 ; GISel-O0-LABEL: memset_tagged_1_zeroval:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #1
+; GISel-O0-NEXT:    mov w8, #1 // =0x1
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    mov x9, xzr
 ; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
@@ -48,7 +48,7 @@ define ptr @memset_tagged_1_zeroval(ptr %dst, i64 %size) {
 ;
 ; GISel-LABEL: memset_tagged_1_zeroval:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #1
+; GISel-NEXT:    mov w8, #1 // =0x1
 ; GISel-NEXT:    setgp [x0]!, x8!, xzr
 ; GISel-NEXT:    setgm [x0]!, x8!, xzr
 ; GISel-NEXT:    setge [x0]!, x8!, xzr
@@ -56,7 +56,7 @@ define ptr @memset_tagged_1_zeroval(ptr %dst, i64 %size) {
 ;
 ; SDAG-LABEL: memset_tagged_1_zeroval:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #1
+; SDAG-NEXT:    mov w8, #1 // =0x1
 ; SDAG-NEXT:    setgp [x0]!, x8!, xzr
 ; SDAG-NEXT:    setgm [x0]!, x8!, xzr
 ; SDAG-NEXT:    setge [x0]!, x8!, xzr
@@ -69,7 +69,7 @@ entry:
 define ptr @memset_tagged_10_zeroval(ptr %dst, i64 %size) {
 ; GISel-O0-LABEL: memset_tagged_10_zeroval:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10
+; GISel-O0-NEXT:    mov w8, #10 // =0xa
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    mov x9, xzr
 ; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
@@ -79,7 +79,7 @@ define ptr @memset_tagged_10_zeroval(ptr %dst, i64 %size) {
 ;
 ; GISel-LABEL: memset_tagged_10_zeroval:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10
+; GISel-NEXT:    mov w8, #10 // =0xa
 ; GISel-NEXT:    setgp [x0]!, x8!, xzr
 ; GISel-NEXT:    setgm [x0]!, x8!, xzr
 ; GISel-NEXT:    setge [x0]!, x8!, xzr
@@ -87,7 +87,7 @@ define ptr @memset_tagged_10_zeroval(ptr %dst, i64 %size) {
 ;
 ; SDAG-LABEL: memset_tagged_10_zeroval:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10
+; SDAG-NEXT:    mov w8, #10 // =0xa
 ; SDAG-NEXT:    setgp [x0]!, x8!, xzr
 ; SDAG-NEXT:    setgm [x0]!, x8!, xzr
 ; SDAG-NEXT:    setge [x0]!, x8!, xzr
@@ -100,7 +100,7 @@ entry:
 define ptr @memset_tagged_10000_zeroval(ptr %dst, i64 %size) {
 ; GISel-O0-LABEL: memset_tagged_10000_zeroval:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10000
+; GISel-O0-NEXT:    mov w8, #10000 // =0x2710
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    mov x9, xzr
 ; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
@@ -110,7 +110,7 @@ define ptr @memset_tagged_10000_zeroval(ptr %dst, i64 %size) {
 ;
 ; GISel-LABEL: memset_tagged_10000_zeroval:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10000
+; GISel-NEXT:    mov w8, #10000 // =0x2710
 ; GISel-NEXT:    setgp [x0]!, x8!, xzr
 ; GISel-NEXT:    setgm [x0]!, x8!, xzr
 ; GISel-NEXT:    setge [x0]!, x8!, xzr
@@ -118,7 +118,7 @@ define ptr @memset_tagged_10000_zeroval(ptr %dst, i64 %size) {
 ;
 ; SDAG-LABEL: memset_tagged_10000_zeroval:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10000
+; SDAG-NEXT:    mov w8, #10000 // =0x2710
 ; SDAG-NEXT:    setgp [x0]!, x8!, xzr
 ; SDAG-NEXT:    setgm [x0]!, x8!, xzr
 ; SDAG-NEXT:    setge [x0]!, x8!, xzr
@@ -169,7 +169,6 @@ define ptr @memset_tagged_0(ptr %dst, i64 %size, i32 %value) {
 ; GISel-LABEL: memset_tagged_0:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov x8, xzr
-; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -178,7 +177,6 @@ define ptr @memset_tagged_0(ptr %dst, i64 %size, i32 %value) {
 ; SDAG-LABEL: memset_tagged_0:
 ; SDAG:       // %bb.0: // %entry
 ; SDAG-NEXT:    mov x8, xzr
-; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -192,7 +190,7 @@ entry:
 define ptr @memset_tagged_1(ptr %dst, i64 %size, i32 %value) {
 ; GISel-O0-LABEL: memset_tagged_1:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #1
+; GISel-O0-NEXT:    mov w8, #1 // =0x1
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    // implicit-def: $x9
 ; GISel-O0-NEXT:    mov w9, w2
@@ -203,8 +201,7 @@ define ptr @memset_tagged_1(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_1:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #1
-; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    mov w8, #1 // =0x1
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -212,8 +209,7 @@ define ptr @memset_tagged_1(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_1:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #1
-; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    mov w8, #1 // =0x1
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -227,7 +223,7 @@ entry:
 define ptr @memset_tagged_10(ptr %dst, i64 %size, i32 %value) {
 ; GISel-O0-LABEL: memset_tagged_10:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10
+; GISel-O0-NEXT:    mov w8, #10 // =0xa
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    // implicit-def: $x9
 ; GISel-O0-NEXT:    mov w9, w2
@@ -238,8 +234,7 @@ define ptr @memset_tagged_10(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_10:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10
-; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    mov w8, #10 // =0xa
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -247,8 +242,7 @@ define ptr @memset_tagged_10(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_10:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10
-; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    mov w8, #10 // =0xa
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -262,7 +256,7 @@ entry:
 define ptr @memset_tagged_10000(ptr %dst, i64 %size, i32 %value) {
 ; GISel-O0-LABEL: memset_tagged_10000:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10000
+; GISel-O0-NEXT:    mov w8, #10000 // =0x2710
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    // implicit-def: $x9
 ; GISel-O0-NEXT:    mov w9, w2
@@ -273,8 +267,7 @@ define ptr @memset_tagged_10000(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_10000:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10000
-; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
+; GISel-NEXT:    mov w8, #10000 // =0x2710
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -282,8 +275,7 @@ define ptr @memset_tagged_10000(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_10000:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10000
-; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
+; SDAG-NEXT:    mov w8, #10000 // =0x2710
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -306,7 +298,6 @@ define ptr @memset_tagged_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_size:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x1!, x2
 ; GISel-NEXT:    setgm [x0]!, x1!, x2
 ; GISel-NEXT:    setge [x0]!, x1!, x2
@@ -314,7 +305,6 @@ define ptr @memset_tagged_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_size:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x1!, x2
 ; SDAG-NEXT:    setgm [x0]!, x1!, x2
 ; SDAG-NEXT:    setge [x0]!, x1!, x2
@@ -337,7 +327,6 @@ define ptr @memset_tagged_size_aligned(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_size_aligned:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x1!, x2
 ; GISel-NEXT:    setgm [x0]!, x1!, x2
 ; GISel-NEXT:    setge [x0]!, x1!, x2
@@ -345,7 +334,6 @@ define ptr @memset_tagged_size_aligned(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_size_aligned:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x1!, x2
 ; SDAG-NEXT:    setgm [x0]!, x1!, x2
 ; SDAG-NEXT:    setge [x0]!, x1!, x2

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index ff7872c922e32f..ed745c12a71963 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -432,7 +432,6 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; GISel-WITHOUT-MOPS-O3-LABEL: memset_10:
 ; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-WITHOUT-MOPS-O3-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GISel-WITHOUT-MOPS-O3-NEXT:    and x9, x1, #0xff
 ; GISel-WITHOUT-MOPS-O3-NEXT:    mul x8, x9, x8
@@ -454,7 +453,6 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; GISel-MOPS-O3-LABEL: memset_10:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GISel-MOPS-O3-NEXT:    and x9, x1, #0xff
 ; GISel-MOPS-O3-NEXT:    mul x8, x9, x8
@@ -464,7 +462,6 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10:
 ; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -474,7 +471,6 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_10:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -523,7 +519,6 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
 ; GISel-MOPS-O3-LABEL: memset_10_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
 ; GISel-MOPS-O3-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
@@ -531,7 +526,6 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
 ; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -541,7 +535,6 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_10_volatile:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -590,7 +583,6 @@ define void @memset_10000(ptr %dst, i32 %value) {
 ; GISel-MOPS-O3-LABEL: memset_10000:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
 ; GISel-MOPS-O3-NEXT:    mov w8, #10000 // =0x2710
-; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
@@ -609,7 +601,6 @@ define void @memset_10000(ptr %dst, i32 %value) {
 ; SDAG-MOPS-O2-LABEL: memset_10000:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
 ; SDAG-MOPS-O2-NEXT:    mov w8, #10000 // =0x2710
-; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, x1
@@ -656,7 +647,6 @@ define void @memset_10000_volatile(ptr %dst, i32 %value) {
 ; GISel-MOPS-O3-LABEL: memset_10000_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
 ; GISel-MOPS-O3-NEXT:    mov w8, #10000 // =0x2710
-; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
@@ -675,7 +665,6 @@ define void @memset_10000_volatile(ptr %dst, i32 %value) {
 ; SDAG-MOPS-O2-LABEL: memset_10000_volatile:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
 ; SDAG-MOPS-O2-NEXT:    mov w8, #10000 // =0x2710
-; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, x1
@@ -724,7 +713,6 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-MOPS-O3-LABEL: memset_size:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
@@ -744,7 +732,6 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_size:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, x2
@@ -793,7 +780,6 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-MOPS-O3-LABEL: memset_size_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
@@ -813,7 +799,6 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_size_volatile:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, x2

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index c9864a357186dd..8cf86903a49262 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -88,7 +88,6 @@ define i64 @smull2(i64 %x, i32 %y) {
 ;
 ; CHECK-GI-LABEL: smull2:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -109,7 +108,6 @@ define i64 @smull2_commuted(i64 %x, i32 %y) {
 ;
 ; CHECK-GI-LABEL: smull2_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -125,7 +123,6 @@ entry:
 define i64 @smull_ldrsb_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsb_b:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -133,7 +130,6 @@ define i64 @smull_ldrsb_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_b:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -149,7 +145,6 @@ entry:
 define i64 @smull_ldrsb_b_commuted(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsb_b_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w9, w8
@@ -157,7 +152,6 @@ define i64 @smull_ldrsb_b_commuted(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_b_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -173,7 +167,6 @@ entry:
 define i64 @smull_ldrsb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -181,7 +174,6 @@ define i64 @smull_ldrsb_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -203,7 +195,6 @@ define i64 @smull_ldrsb_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_w:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -219,7 +210,6 @@ entry:
 define i64 @smull_ldrsh_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsh_b:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsh x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -227,7 +217,6 @@ define i64 @smull_ldrsh_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_b:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -243,7 +232,6 @@ entry:
 define i64 @smull_ldrsh_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsh_h:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsh x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -251,7 +239,6 @@ define i64 @smull_ldrsh_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_h:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -267,7 +254,6 @@ entry:
 define i64 @smull_ldrsh_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsh_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsh x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w9, w8
@@ -275,7 +261,6 @@ define i64 @smull_ldrsh_h_commuted(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -297,7 +282,6 @@ define i64 @smull_ldrsh_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -313,7 +297,6 @@ entry:
 define i64 @smull_ldrsw_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_b:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -321,7 +304,6 @@ define i64 @smull_ldrsw_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_b:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -337,7 +319,6 @@ entry:
 define i64 @smull_ldrsw_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_h:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -345,7 +326,6 @@ define i64 @smull_ldrsw_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_h:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -367,7 +347,6 @@ define i64 @smull_ldrsw_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_w:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -389,7 +368,6 @@ define i64 @smull_ldrsw_w_commuted(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -405,8 +383,6 @@ entry:
 define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_sext_bb:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxtb x8, w0
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -414,8 +390,6 @@ define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_sext_bb:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxtb x8, w0
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -468,7 +442,6 @@ define i64 @smull_ldrsw_zexth(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_zexth:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    smull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -476,7 +449,6 @@ define i64 @smull_ldrsw_zexth(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: smull_ldrsw_zexth:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -492,7 +464,6 @@ define i64 @smull_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -500,7 +471,6 @@ define i64 @smull_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: smull_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -516,7 +486,6 @@ define i64 @smull_ldrsw_zextb_commuted(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_zextb_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smull x0, w9, w8
 ; CHECK-SD-NEXT:    ret
@@ -524,7 +493,6 @@ define i64 @smull_ldrsw_zextb_commuted(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: smull_ldrsw_zextb_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mul x0, x9, x8
 ; CHECK-GI-NEXT:    ret
@@ -539,7 +507,6 @@ entry:
 define i64 @smaddl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
@@ -547,7 +514,6 @@ define i64 @smaddl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -564,7 +530,6 @@ entry:
 define i64 @smaddl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w9, w8, x2
@@ -572,7 +537,6 @@ define i64 @smaddl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
@@ -595,7 +559,6 @@ define i64 @smaddl_ldrsh_w(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -618,7 +581,6 @@ define i64 @smaddl_ldrsh_w_commuted(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsh_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
@@ -635,7 +597,6 @@ entry:
 define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsw_b:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
@@ -643,7 +604,6 @@ define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsw_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -659,7 +619,6 @@ define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 define i64 @smaddl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsw_b_commuted:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w9, w8, x2
@@ -667,7 +626,6 @@ define i64 @smaddl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsw_b_commuted:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
@@ -707,8 +665,6 @@ entry:
 define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_sext_hh:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxth x8, w0
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
@@ -716,8 +672,6 @@ define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_sext_hh:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxth x8, w0
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -757,7 +711,6 @@ define i64 @smaddl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -765,7 +718,6 @@ define i64 @smaddl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: smaddl_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -781,7 +733,6 @@ entry:
 define i64 @smnegl_ldrsb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
@@ -789,7 +740,6 @@ define i64 @smnegl_ldrsb_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -806,7 +756,6 @@ entry:
 define i64 @smnegl_ldrsb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w9, w8
@@ -814,7 +763,6 @@ define i64 @smnegl_ldrsb_h_commuted(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
@@ -837,7 +785,6 @@ define i64 @smnegl_ldrsh_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -860,7 +807,6 @@ define i64 @smnegl_ldrsh_w_commuted(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsh_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
@@ -877,7 +823,6 @@ entry:
 define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsw_b:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
@@ -885,7 +830,6 @@ define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsw_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -901,7 +845,6 @@ define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) {
 define i64 @smnegl_ldrsw_b_commuted(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsw_b_commuted:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w9, w8
@@ -909,7 +852,6 @@ define i64 @smnegl_ldrsw_b_commuted(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsw_b_commuted:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
@@ -949,8 +891,6 @@ entry:
 define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smnegl_sext_hh:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxth x8, w0
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
@@ -958,8 +898,6 @@ define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_sext_hh:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxth x8, w0
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -999,7 +937,6 @@ define i64 @smnegl_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1007,7 +944,6 @@ define i64 @smnegl_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: smnegl_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1023,7 +959,6 @@ entry:
 define i64 @smsubl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
@@ -1031,7 +966,6 @@ define i64 @smsubl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1048,7 +982,6 @@ entry:
 define i64 @smsubl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w9, w8, x2
@@ -1056,7 +989,6 @@ define i64 @smsubl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
@@ -1079,7 +1011,6 @@ define i64 @smsubl_ldrsh_w(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1102,7 +1033,6 @@ define i64 @smsubl_ldrsh_w_commuted(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsh_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
@@ -1119,7 +1049,6 @@ entry:
 define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsw_b:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
@@ -1127,7 +1056,6 @@ define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsw_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1143,7 +1071,6 @@ define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 define i64 @smsubl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsw_b_commuted:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w9, w8, x2
@@ -1151,7 +1078,6 @@ define i64 @smsubl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsw_b_commuted:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
@@ -1191,8 +1117,6 @@ entry:
 define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_sext_hh:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxth x8, w0
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
@@ -1200,8 +1124,6 @@ define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_sext_hh:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxth x8, w0
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1241,7 +1163,6 @@ define i64 @smsubl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1249,7 +1170,6 @@ define i64 @smsubl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: smsubl_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1265,7 +1185,6 @@ entry:
 define i64 @smull_sext_ashr31(i32 %a, i64 %b) nounwind {
 ; CHECK-LABEL: smull_sext_ashr31:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    asr x9, x1, #31
 ; CHECK-NEXT:    mul x0, x8, x9
@@ -1286,7 +1205,6 @@ define i64 @smull_sext_ashr32(i32 %a, i64 %b) nounwind {
 ;
 ; CHECK-GI-LABEL: smull_sext_ashr32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    asr x9, x1, #32
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -1338,7 +1256,6 @@ define i64 @umull_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umull_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1346,7 +1263,6 @@ define i64 @umull_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umull_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1362,7 +1278,6 @@ define i64 @umull_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umull_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umull x0, w9, w8
 ; CHECK-SD-NEXT:    ret
@@ -1370,7 +1285,6 @@ define i64 @umull_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umull_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mul x0, x9, x8
 ; CHECK-GI-NEXT:    ret
@@ -1407,7 +1321,6 @@ define i64 @umull_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: umull_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1415,7 +1328,6 @@ define i64 @umull_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: umull_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1496,7 +1408,6 @@ define i64 @umaddl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umaddl_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1504,7 +1415,6 @@ define i64 @umaddl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umaddl_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1521,7 +1431,6 @@ define i64 @umaddl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umaddl_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umaddl x0, w9, w8, x2
 ; CHECK-SD-NEXT:    ret
@@ -1529,7 +1438,6 @@ define i64 @umaddl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umaddl_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
 ; CHECK-GI-NEXT:    ret
@@ -1568,7 +1476,6 @@ define i64 @umaddl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umaddl_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1576,7 +1483,6 @@ define i64 @umaddl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umaddl_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1661,7 +1567,6 @@ define i64 @umnegl_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umnegl_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umnegl x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1669,7 +1574,6 @@ define i64 @umnegl_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umnegl_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1686,7 +1590,6 @@ define i64 @umnegl_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umnegl_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umnegl x0, w9, w8
 ; CHECK-SD-NEXT:    ret
@@ -1694,7 +1597,6 @@ define i64 @umnegl_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umnegl_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
 ; CHECK-GI-NEXT:    ret
@@ -1733,7 +1635,6 @@ define i64 @umnegl_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: umnegl_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umnegl x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1741,7 +1642,6 @@ define i64 @umnegl_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: umnegl_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1826,7 +1726,6 @@ define i64 @umsubl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umsubl_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1834,7 +1733,6 @@ define i64 @umsubl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umsubl_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1851,7 +1749,6 @@ define i64 @umsubl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umsubl_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umsubl x0, w9, w8, x2
 ; CHECK-SD-NEXT:    ret
@@ -1859,7 +1756,6 @@ define i64 @umsubl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umsubl_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
 ; CHECK-GI-NEXT:    ret
@@ -1898,7 +1794,6 @@ define i64 @umsubl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umsubl_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1906,7 +1801,6 @@ define i64 @umsubl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umsubl_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -2119,22 +2013,13 @@ define i64 @umaddl_and_and(i64 %x, i64 %y, i64 %a) {
 
 ; Check which can contain multiple copies that should all be removed.
 define i32 @f(i32 %0) {
-; CHECK-SD-LABEL: f:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT:    neg w8, w0
-; CHECK-SD-NEXT:  .LBB93_1: // %B
-; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-SD-NEXT:    cbnz x8, .LBB93_1
-; CHECK-SD-NEXT:    b .LBB93_1
-;
-; CHECK-GI-LABEL: f:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    neg w8, w0
-; CHECK-GI-NEXT:  .LBB93_1: // %B
-; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NEXT:    cbnz x8, .LBB93_1
-; CHECK-GI-NEXT:    b .LBB93_1
+; CHECK-LABEL: f:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    neg w8, w0
+; CHECK-NEXT:  .LBB93_1: // %B
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cbnz x8, .LBB93_1
+; CHECK-NEXT:    b .LBB93_1
 entry:
   %1 = sext i32 %0 to i64
   br label %A

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
index e11ae9a2515900..ef7e9f6e0fe434 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
@@ -27,7 +27,6 @@ declare i128 @llvm.vector.reduce.mul.v2i128(<2 x i128>)
 define i8 @mulv_v2i8(<2 x i8> %a) {
 ; CHECK-LABEL: mulv_v2i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mul w0, w9, w8
@@ -51,7 +50,6 @@ entry:
 define i8 @mulv_v4i8(<4 x i8> %a) {
 ; CHECK-SD-LABEL: mulv_v4i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.h[1]
 ; CHECK-SD-NEXT:    umov w9, v0.h[0]
 ; CHECK-SD-NEXT:    umov w10, v0.h[2]
@@ -63,7 +61,6 @@ define i8 @mulv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -80,7 +77,6 @@ entry:
 define i8 @mulv_v8i8(<8 x i8> %a) {
 ; CHECK-SD-LABEL: mulv_v8i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.b[1]
 ; CHECK-SD-NEXT:    umov w9, v0.b[0]
 ; CHECK-SD-NEXT:    umov w10, v0.b[2]
@@ -100,7 +96,6 @@ define i8 @mulv_v8i8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v8i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.b[0]
 ; CHECK-GI-NEXT:    umov w9, v0.b[1]
 ; CHECK-GI-NEXT:    umov w10, v0.b[2]
@@ -223,7 +218,6 @@ entry:
 define i16 @mulv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: mulv_v2i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mul w0, w9, w8
@@ -236,7 +230,6 @@ entry:
 define i16 @mulv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: mulv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.h[1]
 ; CHECK-SD-NEXT:    umov w9, v0.h[0]
 ; CHECK-SD-NEXT:    umov w10, v0.h[2]
@@ -246,7 +239,6 @@ define i16 @mulv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -261,7 +253,6 @@ entry:
 define i16 @mulv_v4i16(<4 x i16> %a) {
 ; CHECK-SD-LABEL: mulv_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.h[1]
 ; CHECK-SD-NEXT:    umov w9, v0.h[0]
 ; CHECK-SD-NEXT:    umov w10, v0.h[2]
@@ -273,7 +264,6 @@ define i16 @mulv_v4i16(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -356,7 +346,6 @@ entry:
 define i32 @mulv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: mulv_v2i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mul w0, w9, w8
@@ -448,11 +437,8 @@ entry:
 define i64 @mulv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: mulv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d2
 ; CHECK-SD-NEXT:    fmov x9, d0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mul x8, x9, x8
 ; CHECK-SD-NEXT:    fmov x9, d1
 ; CHECK-SD-NEXT:    mul x0, x9, x8

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 2e165179381820..1ba16956cad450 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) {
 ; CHECK-NEXT:    stp xzr, xzr, [x0, #16]
 ; CHECK-NEXT:    uaddlv.8h s0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ushll.4s v0, v1, #0
+; CHECK-NEXT:    ucvtf.4s v0, v0
+; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
 ; CHECK-NEXT:    add x8, x0, #8
 ; CHECK-NEXT:    uaddlv.8h s0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    st1.s { v1 }[2], [x8]
-; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ushll.4s v0, v1, #0
+; CHECK-NEXT:    ucvtf.4s v0, v0
+; CHECK-NEXT:    st1.s { v0 }[2], [x8]
+; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -278,9 +278,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
 ; CHECK-NEXT:    stp q0, q0, [x0, #32]
 ; CHECK-NEXT:    mov.h v2[0], v1[0]
 ; CHECK-NEXT:    bic.4h v2, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v2, v2, #0
-; CHECK-NEXT:    ucvtf.4s v2, v2
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    ushll.4s v1, v2, #0
+; CHECK-NEXT:    ucvtf.4s v1, v1
+; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -490,7 +490,6 @@ define void @store_saddlv_v8i8(ptr %H, <8 x i8> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v8i8:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.8b h0, v0
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret
@@ -506,7 +505,6 @@ define void @store_saddlv_v16i8(ptr %H, <16 x i8> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v16i8:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.16b h0, v0
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret
@@ -522,7 +520,6 @@ define void @store_saddlv_v4i16(ptr %H, <4 x i16> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v4i16:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.4h s0, v0
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret
@@ -538,7 +535,6 @@ define void @store_saddlv_v8i16(ptr %H, <8 x i16> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v8i16:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.8h s0, v0
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
index 30ce0cb09fc084..75d7623c4cfb2e 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
@@ -21,7 +21,6 @@ define i32 @narrow_load_v4_i32_single_ele_variable_idx(ptr %ptr, i64 %off, i32 %
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ldr q0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    bfi x8, x2, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr w0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
index 7f20b5e5ee4df5..75a96be9b435e2 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
@@ -24,8 +24,6 @@ declare i128 @llvm.read_volatile_register.i128(metadata) #1
 define void @test_wsr128(i128 noundef %v) #0 {
 ; CHECK-LE-LABEL: test_wsr128:
 ; CHECK-LE:       // %bb.0: // %entry
-; CHECK-LE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
-; CHECK-LE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; CHECK-LE-NEXT:    msrr S1_2_C3_C4_5, x0, x1
 ; CHECK-LE-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
index 4cd7c4a034c875..0dc46b5c9eaafd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
@@ -9,7 +9,6 @@ define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    dup v0.4h, v0.h[4]
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext

diff  --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index d501d9ed24547a..fed183148d06ee 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -248,7 +248,6 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
 ; CHECK-GI-NEXT:    cmp w8, #0
 ; CHECK-GI-NEXT:    cneg w8, w9, le
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)

diff  --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 025bbf749fc71b..61de2dc4edfd16 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -129,9 +129,7 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
 ; CHECK-LABEL: lane_mask_nxv2i1_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.d, #0, #1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    and x8, x1, #0xff
@@ -344,7 +342,6 @@ define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
   ret <16 x i1> %active.lane.mask
@@ -355,7 +352,6 @@ define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
   ret <8 x i1> %active.lane.mask
@@ -366,7 +362,6 @@ define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.h, w0, w1
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
   ret <4 x i1> %active.lane.mask
@@ -377,7 +372,6 @@ define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.s, w0, w1
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC)
   ret <2 x i1> %active.lane.mask
@@ -388,7 +382,6 @@ define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC)
   ret <16 x i1> %active.lane.mask
@@ -399,7 +392,6 @@ define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC)
   ret <8 x i1> %active.lane.mask
@@ -410,7 +402,6 @@ define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.h, x0, x1
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC)
   ret <4 x i1> %active.lane.mask
@@ -421,7 +412,6 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.s, x0, x1
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC)
   ret <2 x i1> %active.lane.mask

diff  --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll
index 4b1393ffc624aa..d6ea5cf34529d2 100644
--- a/llvm/test/CodeGen/AArch64/adc.ll
+++ b/llvm/test/CodeGen/AArch64/adc.ll
@@ -69,7 +69,6 @@ define i128 @test_shifted(i128 %a, i128 %b) {
 define i128 @test_extended(i128 %a, i16 %b) {
 ; CHECK-LE-LABEL: test_extended:
 ; CHECK-LE:       ; %bb.0:
-; CHECK-LE-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-LE-NEXT:    sxth x8, w2
 ; CHECK-LE-NEXT:    adds x0, x0, w2, sxth #3
 ; CHECK-LE-NEXT:    asr x9, x8, #63
@@ -79,7 +78,6 @@ define i128 @test_extended(i128 %a, i16 %b) {
 ;
 ; CHECK-BE-LABEL: test_extended:
 ; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-BE-NEXT:    sxth x8, w2
 ; CHECK-BE-NEXT:    adds x1, x1, w2, sxth #3
 ; CHECK-BE-NEXT:    asr x9, x8, #63

diff  --git a/llvm/test/CodeGen/AArch64/add-extract.ll b/llvm/test/CodeGen/AArch64/add-extract.ll
index 67c9f74ee02988..31c1120a6d0150 100644
--- a/llvm/test/CodeGen/AArch64/add-extract.ll
+++ b/llvm/test/CodeGen/AArch64/add-extract.ll
@@ -69,7 +69,6 @@ define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind {
 define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind {
 ; CHECK-LABEL: add_i32_ext_load:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    add w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index e3072dc41d933c..dbbacf1f55a71d 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -399,19 +399,14 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x8, d2
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    add v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    add x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    add v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = add <3 x i64> %d, %e

diff  --git a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
index 6636813eb25049..312fd348fddce3 100644
--- a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
@@ -318,7 +318,6 @@ define i32 @addmuladd_gep2(ptr %p, i32 %a) {
 ; CHECK-LABEL: addmuladd_gep2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3240 // =0xca8
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    smaddl x8, w1, w8, x0
 ; CHECK-NEXT:    ldr w8, [x8, #3260]
 ; CHECK-NEXT:    tbnz w8, #31, .LBB22_2

diff  --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
index 54c96820285d32..526943eaf88ad2 100644
--- a/llvm/test/CodeGen/AArch64/addp-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
@@ -28,7 +28,6 @@ define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addp v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
   %r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -51,7 +50,6 @@ define <4 x i16> @deinterleave_shuffle_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addp v0.8h, v0.8h, v0.8h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %r1 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -74,7 +72,6 @@ define <8 x i8> @deinterleave_shuffle_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addp v0.16b, v0.16b, v0.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %r1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>

diff  --git a/llvm/test/CodeGen/AArch64/addsub_ext.ll b/llvm/test/CodeGen/AArch64/addsub_ext.ll
index 04a98bd5088803..6c3564375279e9 100644
--- a/llvm/test/CodeGen/AArch64/addsub_ext.ll
+++ b/llvm/test/CodeGen/AArch64/addsub_ext.ll
@@ -26,7 +26,6 @@ define i32 @add_z_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @add_z_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxtb
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -37,7 +36,6 @@ define i64 @add_z_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @add_z_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxtb #3
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -70,7 +68,6 @@ define i32 @add_s_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @add_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxtb
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -81,7 +78,6 @@ define i64 @add_s_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @add_s_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxtb #3
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -114,7 +110,6 @@ define i32 @add_z_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @add_z_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxth
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -125,7 +120,6 @@ define i64 @add_z_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @add_z_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxth #3
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -179,7 +173,6 @@ define i32 @add_s_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @add_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxth
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -190,7 +183,6 @@ define i64 @add_s_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @add_s_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxth #3
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -244,7 +236,6 @@ define i32 @sub_z_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @sub_z_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxtb
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -255,7 +246,6 @@ define i64 @sub_z_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @sub_z_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxtb #3
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -288,7 +278,6 @@ define i32 @sub_s_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @sub_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxtb
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -299,7 +288,6 @@ define i64 @sub_s_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @sub_s_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxtb #3
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -332,7 +320,6 @@ define i32 @sub_z_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @sub_z_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxth
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -343,7 +330,6 @@ define i64 @sub_z_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @sub_z_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxth #3
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -397,7 +383,6 @@ define i32 @sub_s_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @sub_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxth
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -408,7 +393,6 @@ define i64 @sub_s_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @sub_s_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxth #3
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -444,7 +428,7 @@ define i32 @cmp_s_i8i32(i8 %v, i32 %lhs) minsize {
 ; CHECK-NEXT:    cmp w1, w0, uxtb
 ; CHECK-NEXT:    b.ge .LBB40_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB40_2: // %end
 ; CHECK-NEXT:    mov w0, w1
@@ -461,11 +445,10 @@ end:
 define i64 @cmp_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: cmp_s_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp x1, w0, sxtb
 ; CHECK-NEXT:    b.ge .LBB41_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB41_2: // %end
 ; CHECK-NEXT:    mov x0, x1
@@ -485,7 +468,7 @@ define i32 @cmp_s_i16i32(i16 %v, i32 %lhs) minsize {
 ; CHECK-NEXT:    cmp w1, w0, uxth
 ; CHECK-NEXT:    b.ge .LBB42_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB42_2: // %end
 ; CHECK-NEXT:    mov w0, w1
@@ -502,11 +485,10 @@ end:
 define i64 @cmp_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: cmp_s_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp x1, w0, sxth
 ; CHECK-NEXT:    b.ge .LBB43_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB43_2: // %end
 ; CHECK-NEXT:    mov x0, x1

diff  --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 09f00b3845f25f..5046c0571ad2bc 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -11,7 +11,6 @@ define void @new_position(i32 %pos) {
 ; CHECK-SD-LABEL: new_position:
 ; CHECK-SD:       ; %bb.0: ; %entry
 ; CHECK-SD-NEXT:    adrp x8, _board at GOTPAGE
-; CHECK-SD-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr x8, [x8, _board at GOTPAGEOFF]
 ; CHECK-SD-NEXT:    ldrb w8, [x8, w0, sxtw]
 ; CHECK-SD-NEXT:    sub w8, w8, #1

diff  --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index 5c7429aebb31e9..e99a2754ef0857 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -1200,19 +1200,14 @@ define <3 x i64> @and_v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: and_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x8, d2
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    and x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = and <3 x i64> %d, %e
@@ -1229,19 +1224,14 @@ define <3 x i64> @or_v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: or_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x8, d2
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = or <3 x i64> %d, %e
@@ -1258,19 +1248,14 @@ define <3 x i64> @xor_v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: xor_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x8, d2
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    eor x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = xor <3 x i64> %d, %e

diff  --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 1b22514a59d608..672002a03d58dc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -173,7 +173,6 @@ define void @bar(i32 %x, <4 x i32> %y) nounwind {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #80
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str w0, [sp, #60]
 ; CHECK-NEXT:    stp q0, q0, [sp, #16]
 ; CHECK-NEXT:    str x0, [sp]
@@ -241,7 +240,6 @@ define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #80
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str w0, [sp, #60]
 ; CHECK-NEXT:    stp x1, x2, [sp, #32]
 ; CHECK-NEXT:    stp x1, x2, [sp, #16]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-addp.ll b/llvm/test/CodeGen/AArch64/arm64-addp.ll
index 47ffba9a509ac9..28a90271dbc368 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addp.ll
@@ -27,7 +27,6 @@ define i64 @foo0(<2 x i64> %a) nounwind {
 define float @foo1(<2 x float> %a) nounwind {
 ; CHECK-LABEL: foo1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp.2s s0, v0
 ; CHECK-NEXT:    ret
   %lane0.i = extractelement <2 x float> %a, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index 65cc368c0b5617..f8df92cd7e74d8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -11,8 +11,6 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
 ; CHECK-LABEL: fullGtU:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    adrp x9, _block at GOTPAGE
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    sxtw x10, w1
 ; CHECK-NEXT:    ldr x9, [x9, _block at GOTPAGEOFF]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
index 37c61d0a4a0fb6..4a84c673af8cfc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -43,10 +43,6 @@ define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; LSE-LABEL: val_compare_and_swap:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    caspa x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -94,10 +90,6 @@ define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; LSE-LABEL: val_compare_and_swap_seqcst:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -145,10 +137,6 @@ define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; LSE-LABEL: val_compare_and_swap_release:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    caspl x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -196,10 +184,6 @@ define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
 ;
 ; LSE-LABEL: val_compare_and_swap_monotonic:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -251,7 +235,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) {
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
-; LSE-NEXT:    and x8, x7, x3
+; LSE-NEXT:    and x8, x5, x3
 ; LSE-NEXT:    and x9, x4, x2
 ; LSE-NEXT:    mvn x10, x9
 ; LSE-NEXT:    mvn x11, x8
@@ -311,7 +295,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    orr x8, x4, x2
-; LSE-NEXT:    orr x9, x7, x3
+; LSE-NEXT:    orr x9, x5, x3
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
@@ -368,7 +352,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    adds x8, x4, x2
-; LSE-NEXT:    adc x9, x7, x3
+; LSE-NEXT:    adc x9, x5, x3
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
@@ -424,7 +408,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    subs x8, x4, x2
-; LSE-NEXT:    sbc x9, x7, x3
+; LSE-NEXT:    sbc x9, x5, x3
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
@@ -484,8 +468,8 @@ define void @fetch_and_min(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x7
-; LSE-NEXT:    csel x9, x7, x3, ge
+; LSE-NEXT:    sbcs xzr, x3, x5
+; LSE-NEXT:    csel x9, x5, x3, ge
 ; LSE-NEXT:    csel x8, x4, x2, ge
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -546,8 +530,8 @@ define void @fetch_and_max(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x7
-; LSE-NEXT:    csel x9, x7, x3, lt
+; LSE-NEXT:    sbcs xzr, x3, x5
+; LSE-NEXT:    csel x9, x5, x3, lt
 ; LSE-NEXT:    csel x8, x4, x2, lt
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -608,8 +592,8 @@ define void @fetch_and_umin(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x7
-; LSE-NEXT:    csel x9, x7, x3, hs
+; LSE-NEXT:    sbcs xzr, x3, x5
+; LSE-NEXT:    csel x9, x5, x3, hs
 ; LSE-NEXT:    csel x8, x4, x2, hs
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -670,8 +654,8 @@ define void @fetch_and_umax(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x7
-; LSE-NEXT:    csel x9, x7, x3, lo
+; LSE-NEXT:    sbcs xzr, x3, x5
+; LSE-NEXT:    csel x9, x5, x3, lo
 ; LSE-NEXT:    csel x8, x4, x2, lo
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -714,8 +698,8 @@ define i128 @atomic_load_seq_cst(ptr %p) {
 ;
 ; LSE-LABEL: atomic_load_seq_cst:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    mov x2, #0
-; LSE-NEXT:    mov x3, #0
+; LSE-NEXT:    mov x2, #0 // =0x0
+; LSE-NEXT:    mov x3, #0 // =0x0
 ; LSE-NEXT:    caspal x2, x3, x2, x3, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -747,8 +731,8 @@ define i128 @atomic_load_relaxed(i64, i64, ptr %p) {
 ;
 ; LSE-LABEL: atomic_load_relaxed:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    mov x0, #0
-; LSE-NEXT:    mov x1, #0
+; LSE-NEXT:    mov x0, #0 // =0x0
+; LSE-NEXT:    mov x1, #0 // =0x0
 ; LSE-NEXT:    casp x0, x1, x0, x1, [x2]
 ; LSE-NEXT:    ret
     %r = load atomic i128, ptr %p monotonic, align 16
@@ -779,9 +763,7 @@ define void @atomic_store_seq_cst(i128 %in, ptr %p) {
 ;
 ; LSE-LABEL: atomic_store_seq_cst:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:    ldp x4, x5, [x2]
-; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:  .LBB14_1: // %atomicrmw.start
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x6, x4
@@ -821,9 +803,7 @@ define void @atomic_store_release(i128 %in, ptr %p) {
 ;
 ; LSE-LABEL: atomic_store_release:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:    ldp x4, x5, [x2]
-; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:  .LBB15_1: // %atomicrmw.start
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x6, x4
@@ -863,9 +843,7 @@ define void @atomic_store_relaxed(i128 %in, ptr %p) {
 ;
 ; LSE-LABEL: atomic_store_relaxed:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:    ldp x4, x5, [x2]
-; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:  .LBB16_1: // %atomicrmw.start
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x6, x4
@@ -921,10 +899,6 @@ define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) {
 ;
 ; LSE-LABEL: cmpxchg_dead:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
-; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
-; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    ret
   cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic

diff  --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index bdbff0563a22b9..fb17bdac1fe78d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -771,7 +771,6 @@ entry:
 define i64 @fct18(i32 %xor72) nounwind ssp {
 ; LLC-LABEL: fct18:
 ; LLC:       // %bb.0:
-; LLC-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; LLC-NEXT:    ubfx x0, x0, #9, #8
 ; LLC-NEXT:    ret
 ; OPT-LABEL: @fct18(

diff  --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 82802c79c70858..3dd384ab2aa9ab 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -6,10 +6,6 @@
 define <4 x float>  @foo(float %a, float %b, float %c, float %d) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-NEXT:    mov v0.s[3], v3.s[0]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
index 1b9db8b70a1fb4..2283cd9e771651 100644
--- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -612,7 +612,6 @@ define <1 x i8> @getL() {
 ; CHECK-LABEL: _setL
 ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
 ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L at GOTPAGE
-; CHECK-NEXT: ; kill
 ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
 ; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _L at GOTPAGEOFF]
 ; Ultimately we should generate str b0, but right now, we match the vector

diff  --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 4c28ea75922024..66356a45b2e514 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -43,7 +43,6 @@ define <2 x i32> @v_dup32(i32 %A) nounwind {
 define <2 x float> @v_dupfloat(float %A) nounwind {
 ; CHECK-LABEL: v_dupfloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
@@ -119,7 +118,6 @@ define <4 x i16> @v_dup16_const(i16 %y, ptr %p) {
 define <4 x float> @v_dupQfloat(float %A) nounwind {
 ; CHECK-LABEL: v_dupQfloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
@@ -164,7 +162,6 @@ define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
 ; CHECK-LABEL: v_shuffledupfloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <2 x float> undef, float %A, i32 0
@@ -205,7 +202,6 @@ define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 ; CHECK-LABEL: v_shuffledupQfloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <4 x float> undef, float %A, i32 0
@@ -216,7 +212,6 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 define <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
 ; CHECK-LABEL: vduplane8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.8b v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -226,7 +221,6 @@ define <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
 define <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
 ; CHECK-LABEL: vduplane16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.4h v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -236,7 +230,6 @@ define <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
 define <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
 ; CHECK-LABEL: vduplane32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
@@ -246,7 +239,6 @@ define <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
 define <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
 ; CHECK-LABEL: vduplanefloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
@@ -256,7 +248,6 @@ define <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
 define <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.16b v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -266,7 +257,6 @@ define <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
 define <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.8h v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -276,7 +266,6 @@ define <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
 define <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -286,7 +275,6 @@ define <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
 define <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
 ; CHECK-LABEL: vduplaneQfloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -338,14 +326,12 @@ define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov.s v0[1], w1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: f:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov.s v0[0], w0
 ; CHECK-GI-NEXT:    mov.s v0[1], w1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
   %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
@@ -409,7 +395,6 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov.s w8, v0[3]
 ; CHECK-GI-NEXT:    mov.h v0[3], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %val = extractelement <4 x i32> %in, i32 3
   %smallval = trunc i32 %val to i16
@@ -445,20 +430,15 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n
 ; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    trn1.4h v0, v0, v0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov.s v0[1], v1[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
 ; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI34_0]
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
   ret <4 x i16> %r
@@ -468,20 +448,15 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
 ; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4f16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    trn1.4h v0, v0, v0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov.s v0[1], v1[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
 ; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
   ret <4 x half> %r
@@ -497,9 +472,7 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI36_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
@@ -516,9 +489,7 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI37_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
@@ -538,14 +509,13 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
 ; CHECK-GI-LABEL: disguised_dup:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI38_1
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_1]
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
-; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
-; CHECK-GI-NEXT:    tbl.16b v2, { v0, v1 }, v2
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    tbl.16b v1, { v0, v1 }, v1
 ; CHECK-GI-NEXT:    str q0, [x0]
-; CHECK-GI-NEXT:    str q2, [x1]
+; CHECK-GI-NEXT:    str q1, [x1]
 ; CHECK-GI-NEXT:    ret
   %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 0>
   %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
@@ -587,7 +557,6 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
 ; CHECK-GI-NEXT:    adrp x8, .LCPI40_0
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI40_0]
 ; CHECK-GI-NEXT:    add.4s v0, v0, v1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = add <4 x i32> %A, <i32 8421377, i32 8421377, i32 8421377, i32 8421377>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -598,7 +567,6 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
 ; CHECK-SD-LABEL: dup_const24:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    movk w8, #128, lsl #16
 ; CHECK-SD-NEXT:    dup.4s v3, w8
 ; CHECK-SD-NEXT:    add.2s v0, v0, v3
@@ -610,7 +578,6 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
 ; CHECK-GI-LABEL: dup_const24:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI41_1
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    ldr d3, [x8, :lo12:.LCPI41_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI41_0
 ; CHECK-GI-NEXT:    add.2s v0, v0, v3
@@ -656,7 +623,6 @@ define <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
 define <8 x i16> @bitcast_f64_v8i16(double %a) {
 ; CHECK-LABEL: bitcast_f64_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.8h v0, v0[0]
 ; CHECK-NEXT:    ret
   %b = bitcast double %a to <4 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll
index 50df6a0388587b..6a6901790e5c35 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll
@@ -108,11 +108,8 @@ define <4 x i16> @test_undef(<8 x i16> %tmp1, <8 x i16> %tmp2) {
 ; CHECK-GI-LABEL: test_undef:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
   ret <4 x i16> %tmp3

diff  --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
index 7a4cdd52db904a..987e09fc19dd10 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
@@ -8,8 +8,6 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    umov w9, v0.b[1]
 ; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #3
@@ -28,7 +26,6 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
 ; CHECK-GISEL-NEXT:    sub sp, sp, #16
 ; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-GISEL-NEXT:    mov w9, w0
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    mov b1, v0.b[1]
 ; CHECK-GISEL-NEXT:    add x8, sp, #8
 ; CHECK-GISEL-NEXT:    and x9, x9, #0x7
@@ -65,7 +62,6 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #4
 ; CHECK-SDAG-NEXT:    ldr b1, [x8]
@@ -134,7 +130,6 @@ define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SDAG-NEXT:    ldr w0, [x8]
@@ -145,7 +140,6 @@ define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) {
 ; CHECK-GISEL:       // %bb.0:
 ; CHECK-GISEL-NEXT:    sub sp, sp, #16
 ; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    mov s1, v0.s[1]
 ; CHECK-GISEL-NEXT:    mov w9, w0
 ; CHECK-GISEL-NEXT:    add x8, sp, #12
@@ -165,14 +159,11 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    umov w9, v0.h[1]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SDAG-NEXT:    ld1 { v0.h }[0], [x8]
 ; CHECK-SDAG-NEXT:    mov v0.s[1], w9
-; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SDAG-NEXT:    add sp, sp, #16
 ; CHECK-SDAG-NEXT:    ret
 ;
@@ -184,13 +175,11 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
 ; CHECK-GISEL-NEXT:    mov w8, #2 // =0x2
 ; CHECK-GISEL-NEXT:    add x10, sp, #8
 ; CHECK-GISEL-NEXT:    and x9, x9, #0x3
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    str d0, [sp, #8]
 ; CHECK-GISEL-NEXT:    madd x8, x9, x8, x10
 ; CHECK-GISEL-NEXT:    umov w9, v0.h[1]
 ; CHECK-GISEL-NEXT:    ld1 { v0.h }[0], [x8]
 ; CHECK-GISEL-NEXT:    mov v0.s[1], w9
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    add sp, sp, #16
 ; CHECK-GISEL-NEXT:    ret
   %tmp = extractelement <4 x i16> %x, i32 %idx
@@ -206,7 +195,6 @@ define <4 x i16> @test_varidx_extract_v8s16(<8 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SDAG-NEXT:    ldr h1, [x8]
@@ -249,7 +237,6 @@ define i32 @test_varidx_extract_v2s32(<2 x i32> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SDAG-NEXT:    ldr w0, [x8]
@@ -277,7 +264,6 @@ define <2 x i32> @test_varidx_extract_v4s32(<4 x i32> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SDAG-NEXT:    ldr s1, [x8]
@@ -312,7 +298,6 @@ define i64 @test_varidx_extract_v2s64(<2 x i64> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SDAG-NEXT:    ldr x0, [x8]
@@ -340,7 +325,6 @@ define ptr @test_varidx_extract_v2p0(<2 x ptr> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
-; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SDAG-NEXT:    ldr x0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll b/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
index f08ff2ccf693d9..df6caad99066f6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
@@ -8,7 +8,6 @@ define <8 x i8> @v8i8(<16 x i8> %a) nounwind {
 ; CHECK-SD-LABEL: v8i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v8i8:
@@ -23,7 +22,6 @@ define <4 x i16> @v4i16(<8 x i16> %a) nounwind {
 ; CHECK-SD-LABEL: v4i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v4i16:
@@ -38,7 +36,6 @@ define <2 x i32> @v2i32(<4 x i32> %a) nounwind {
 ; CHECK-SD-LABEL: v2i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v2i32:
@@ -53,7 +50,6 @@ define <1 x i64> @v1i64(<2 x i64> %a) nounwind {
 ; CHECK-SD-LABEL: v1i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v1i64:
@@ -68,7 +64,6 @@ define <1 x ptr> @v1p0(<2 x ptr> %a) nounwind {
 ; CHECK-SD-LABEL: v1p0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v1p0:
@@ -83,7 +78,6 @@ define <2 x float> @v2f32(<4 x float> %a) nounwind {
 ; CHECK-SD-LABEL: v2f32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v2f32:
@@ -98,7 +92,6 @@ define <1 x double> @v1f64(<2 x double> %a) nounwind {
 ; CHECK-SD-LABEL: v1f64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v1f64:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
index d5324e4274725b..74d117852fd7cd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
@@ -7,10 +7,7 @@ define float @test1(float %x, float %y) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    mvni.4s v2, #128, lsl #24
-; CHECK-NEXT:    ; kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    ; kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    bif.16b v0, v1, v2
-; CHECK-NEXT:    ; kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
@@ -21,11 +18,8 @@ define double @test2(double %x, double %y) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg.2d v2, v2
 ; CHECK-NEXT:    bif.16b v0, v1, v2
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call double @copysign(double %x, double %y) nounwind readnone
@@ -38,11 +32,9 @@ define double @test3(double %a, float %b, float %c) nounwind {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.2d v3, #0xffffffffffffffff
 ; CHECK-NEXT:    fadd s1, s1, s2
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fneg.2d v2, v3
 ; CHECK-NEXT:    fcvt d1, s1
 ; CHECK-NEXT:    bif.16b v0, v1, v2
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp1 = fadd float %b, %c
   %tmp2 = fpext float %tmp1 to double

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index b580c4921fb66a..00e30a5c9f1718 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -25,7 +25,6 @@ define double @bar(ptr %iVals, ptr %fVals, ptr %dVals) {
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    sri d0, d0, #1
 ; CHECK-NEXT:    scvtf.2d v0, v0, #1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %arrayidx = getelementptr inbounds double, ptr %dVals, i64 16

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index d7d54a6e48a92d..8ae1e0768db3db 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -71,7 +71,6 @@ define i64 @test_integer(i64  %in) {
 define float @test_f16(half %in) {
 ; CHECK-LABEL: test_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fcvt s1, h0
 ; CHECK-NEXT:    movi d2, #0000000000000000
 ; CHECK-NEXT:    fcmp s1, #0.0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 7eb26096ed1566..a3d48268957cc8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -602,7 +602,6 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -621,7 +620,6 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
   %val32 = fptosi <2 x fp128> %val to <2 x i32>
@@ -686,7 +684,6 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -705,7 +702,6 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
   %val32 = fptoui <2 x fp128> %val to <2 x i32>
@@ -761,7 +757,6 @@ define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatsitf
@@ -783,7 +778,6 @@ define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatsitf
@@ -851,7 +845,6 @@ define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatunsitf
@@ -873,7 +866,6 @@ define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatunsitf
@@ -979,10 +971,9 @@ define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-GI-NEXT:    bl __letf2
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    cset w8, le
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cset w8, le
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
   %val = fcmp ole <2 x fp128> %lhs, %rhs
@@ -1034,10 +1025,9 @@ define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-GI-NEXT:    bl __letf2
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    cset w8, gt
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    cset w8, gt
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
   %val = fcmp ugt <2 x fp128> %lhs, %rhs
@@ -1116,7 +1106,6 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-GI-NEXT:    orr w8, w20, w8
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #96
 ; CHECK-GI-NEXT:    ret
   %val = fcmp ueq <2 x fp128> %lhs, %rhs
@@ -1126,7 +1115,6 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 define <2 x fp128> @vec_select(<2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-SD-LABEL: vec_select:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w8, s0
 ; CHECK-SD-NEXT:    tst w8, #0x1
 ; CHECK-SD-NEXT:    b.eq .LBB40_2
@@ -1145,7 +1133,6 @@ define <2 x fp128> @vec_select(<2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rh
 ;
 ; CHECK-GI-LABEL: vec_select:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w9, s0
 ; CHECK-GI-NEXT:    mov d5, v1.d[1]
@@ -1183,15 +1170,12 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfhf2
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfhf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1206,11 +1190,9 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfhf2
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
@@ -1219,7 +1201,6 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
   %dst = fptrunc <2 x fp128> %val to <2 x half>
@@ -1236,15 +1217,12 @@ define <2 x float> @vec_round_f32(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfsf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfsf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1256,12 +1234,10 @@ define <2 x float> @vec_round_f32(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfsf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfsf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    fmov d0, d1
@@ -1281,12 +1257,10 @@ define <2 x double> @vec_round_f64(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfdf2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfdf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1300,12 +1274,10 @@ define <2 x double> @vec_round_f64(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfdf2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfdf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v1.16b
@@ -1322,9 +1294,7 @@ define <2 x fp128> @vec_extend_f16(<2 x half> %val) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendhftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -1345,9 +1315,7 @@ define <2 x fp128> @vec_extend_f16(<2 x half> %val) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendhftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -1369,9 +1337,7 @@ define <2 x fp128> @vec_extend_f32(<2 x float> %val) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -1392,9 +1358,7 @@ define <2 x fp128> @vec_extend_f32(<2 x float> %val) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendsftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -1417,7 +1381,6 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1438,7 +1401,6 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8

diff  --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 0412aef7545e9d..9b799da165fc3e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -821,13 +821,11 @@ define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) {
 define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], #1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b0, v0[3]
 ; CHECK-GI-NEXT:    str b0, [x0], #1
 ; CHECK-GI-NEXT:    ret
@@ -842,13 +840,11 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #2 ; =0x2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b0, v0[3]
 ; CHECK-GI-NEXT:    str b0, [x0], #2
 ; CHECK-GI-NEXT:    ret
@@ -862,13 +858,11 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
 define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], #2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h0, v0[3]
 ; CHECK-GI-NEXT:    str h0, [x0], #2
 ; CHECK-GI-NEXT:    ret
@@ -883,13 +877,11 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #4 ; =0x4
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h0, v0[3]
 ; CHECK-GI-NEXT:    str h0, [x0], #4
 ; CHECK-GI-NEXT:    ret
@@ -903,13 +895,11 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
 define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #4
 ; CHECK-GI-NEXT:    ret
@@ -924,13 +914,11 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #8
 ; CHECK-GI-NEXT:    ret
@@ -944,13 +932,11 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
 define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #4
 ; CHECK-GI-NEXT:    ret
@@ -965,13 +951,11 @@ define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #8
 ; CHECK-GI-NEXT:    ret
@@ -5490,18 +5474,14 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -5513,18 +5493,14 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -5539,18 +5515,14 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>,
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -5562,18 +5534,14 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -5588,18 +5556,14 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #4
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -5611,19 +5575,15 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -5638,18 +5598,14 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>,
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #4
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -5661,19 +5617,15 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -5688,18 +5640,14 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>,
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #8
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -5711,19 +5659,15 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -5738,18 +5682,14 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>,
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #8
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -5761,19 +5701,15 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -5788,18 +5724,14 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>,
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -5811,19 +5743,15 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -5838,18 +5766,14 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>,
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -5861,19 +5785,15 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -5888,18 +5808,14 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>,
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #8
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -5911,19 +5827,15 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -5938,18 +5850,14 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #8
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -5961,19 +5869,15 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -5988,18 +5892,14 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -6011,19 +5911,15 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -6038,18 +5934,14 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -6061,19 +5953,15 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -6088,20 +5976,14 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -6113,20 +5995,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A,
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -6141,20 +6017,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -6166,20 +6036,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -6194,20 +6058,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #6
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #6
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -6219,21 +6077,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A,
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -6248,20 +6100,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #6
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #6
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -6273,21 +6119,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A,
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -6302,20 +6142,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #12
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -6327,21 +6161,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A,
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -6356,20 +6184,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #12
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -6381,21 +6203,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A,
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -6410,20 +6226,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #24
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -6435,21 +6245,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A,
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -6464,20 +6268,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #24
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -6489,21 +6287,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A,
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -6518,20 +6310,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #12
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -6543,21 +6329,15 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -6572,20 +6352,14 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #12
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -6597,21 +6371,15 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -6626,20 +6394,14 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #24
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -6651,21 +6413,15 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -6680,20 +6436,14 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #24
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -6705,21 +6455,15 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -6734,22 +6478,14 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #4
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -6761,22 +6497,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -6791,22 +6519,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #4
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -6818,22 +6538,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -6848,22 +6560,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #8
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -6875,23 +6579,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -6906,22 +6602,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #8
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -6933,23 +6621,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -6964,22 +6644,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -6991,23 +6663,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -7022,22 +6686,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -7049,23 +6705,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -7080,22 +6728,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #32
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -7107,23 +6747,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -7138,22 +6770,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #32
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -7165,23 +6789,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -7196,22 +6812,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -7223,23 +6831,15 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -7254,22 +6854,14 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #16
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -7281,23 +6873,15 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -7312,22 +6896,14 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #32
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -7339,23 +6915,15 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -7370,22 +6938,14 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, #32
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -7397,23 +6957,15 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -7428,17 +6980,13 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -7449,17 +6997,13 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
 define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -7473,17 +7017,13 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
 define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -7494,17 +7034,13 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n
 define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -7518,17 +7054,13 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -7540,8 +7072,6 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7549,8 +7079,6 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -7564,17 +7092,13 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
 define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -7586,8 +7110,6 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7595,8 +7117,6 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -7610,17 +7130,13 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
 define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -7632,8 +7148,6 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7641,8 +7155,6 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -7656,17 +7168,13 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
 define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -7678,8 +7186,6 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7687,8 +7193,6 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -7702,17 +7206,13 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
 define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -7724,8 +7224,6 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7733,8 +7231,6 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -7748,17 +7244,13 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
 define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -7770,8 +7262,6 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7779,8 +7269,6 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -7794,17 +7282,13 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
 define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -7816,8 +7300,6 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7825,8 +7307,6 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -7840,17 +7320,13 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr)
 define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -7862,8 +7338,6 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7871,8 +7345,6 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -7886,17 +7358,13 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr)
 define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -7908,8 +7376,6 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7917,8 +7383,6 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -7932,17 +7396,13 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr)
 define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -7954,8 +7414,6 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7963,8 +7421,6 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -7978,19 +7434,13 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr)
 define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -8001,19 +7451,13 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
 define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -8027,19 +7471,13 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt
 define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -8050,19 +7488,13 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <
 define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -8076,19 +7508,13 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -8099,20 +7525,14 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -8126,19 +7546,13 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt
 define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -8149,20 +7563,14 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -8176,19 +7584,13 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt
 define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -8199,20 +7601,14 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -8226,19 +7622,13 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt
 define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -8249,20 +7639,14 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -8276,19 +7660,13 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt
 define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -8299,20 +7677,14 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -8326,19 +7698,13 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt
 define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -8349,20 +7715,14 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -8376,19 +7736,13 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt
 define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -8399,20 +7753,14 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -8426,19 +7774,13 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x floa
 define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -8449,20 +7791,14 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -8476,19 +7812,13 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa
 define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -8499,20 +7829,14 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -8526,19 +7850,13 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do
 define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -8549,20 +7867,14 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -8576,21 +7888,13 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do
 define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -8601,21 +7905,13 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
 define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -8629,21 +7925,13 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1
 define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -8654,21 +7942,13 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <
 define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -8682,21 +7962,13 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i
 define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -8707,22 +7979,14 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -8736,21 +8000,13 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8
 define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -8761,22 +8017,14 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -8790,21 +8038,13 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4
 define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -8815,22 +8055,14 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -8844,21 +8076,13 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4
 define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -8869,22 +8093,14 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -8898,21 +8114,13 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2
 define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -8923,22 +8131,14 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -8952,21 +8152,13 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2
 define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -8977,22 +8169,14 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -9006,21 +8190,13 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1
 define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -9031,22 +8207,14 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -9060,21 +8228,13 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa
 define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -9085,22 +8245,14 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -9114,21 +8266,13 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa
 define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -9139,22 +8283,14 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -9168,21 +8304,13 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do
 define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -9193,22 +8321,14 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -9222,17 +8342,13 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do
 define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -9243,17 +8359,13 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
 define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -9267,17 +8379,13 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
 define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -9288,17 +8396,13 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C)
 define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -9312,17 +8416,13 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -9334,8 +8434,6 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9343,8 +8441,6 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -9358,17 +8454,13 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
 define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -9380,8 +8472,6 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9389,8 +8479,6 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -9404,17 +8492,13 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
 define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -9426,8 +8510,6 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9435,8 +8517,6 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -9450,17 +8530,13 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
 define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -9472,8 +8548,6 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9481,8 +8555,6 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -9496,17 +8568,13 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
 define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -9518,8 +8586,6 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9527,8 +8593,6 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -9542,17 +8606,13 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
 define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -9564,8 +8624,6 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9573,8 +8631,6 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -9588,17 +8644,13 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
 define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -9610,8 +8662,6 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9619,8 +8669,6 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -9634,17 +8682,13 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr)
 define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -9656,8 +8700,6 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9665,8 +8707,6 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -9680,17 +8720,13 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr)
 define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -9702,8 +8738,6 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9711,8 +8745,6 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -9726,17 +8758,13 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr)
 define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -9748,8 +8776,6 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9757,8 +8783,6 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -9772,19 +8796,13 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr)
 define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -9795,19 +8813,13 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
 define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -9821,19 +8833,13 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>,
 define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -9844,19 +8850,13 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C,
 define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -9870,19 +8870,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -9893,20 +8887,14 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -9920,19 +8908,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>,
 define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -9943,20 +8925,14 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -9970,19 +8946,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,
 define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -9993,20 +8963,14 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -10020,19 +8984,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,
 define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -10043,20 +9001,14 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -10070,19 +9022,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>,
 define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -10093,20 +9039,14 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -10120,19 +9060,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,
 define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -10143,20 +9077,14 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -10170,19 +9098,13 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,
 define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -10193,20 +9115,14 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -10220,19 +9136,13 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl
 define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -10243,20 +9153,14 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -10270,19 +9174,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl
 define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -10293,20 +9191,14 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -10320,19 +9212,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x
 define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -10343,20 +9229,14 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -10370,21 +9250,13 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x
 define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -10395,21 +9267,13 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
 define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -10423,21 +9287,13 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>,
 define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -10448,21 +9304,13 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C,
 define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -10476,21 +9324,13 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x
 define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -10501,22 +9341,14 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -10530,21 +9362,13 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>,
 define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -10555,22 +9379,14 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -10584,21 +9400,13 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<
 define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -10609,22 +9417,14 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -10638,21 +9438,13 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<
 define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -10663,22 +9455,14 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -10692,21 +9476,13 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>,
 define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -10717,22 +9493,14 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -10746,21 +9514,13 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<
 define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -10771,22 +9531,14 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -10800,21 +9552,13 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<
 define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -10825,22 +9569,14 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -10854,21 +9590,13 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl
 define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -10879,22 +9607,14 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -10908,21 +9628,13 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl
 define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -10933,22 +9645,14 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -10962,21 +9666,13 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x
 define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -10987,22 +9683,14 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -11015,17 +9703,13 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x
 define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -11036,17 +9720,13 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
 define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -11060,17 +9740,13 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr)
 define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -11081,17 +9757,13 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
 define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -11105,17 +9777,13 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr)
 define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -11127,8 +9795,6 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11136,8 +9802,6 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -11151,17 +9815,13 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr)
 define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -11173,8 +9833,6 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11182,8 +9840,6 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -11197,17 +9853,13 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr)
 define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -11219,8 +9871,6 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11228,8 +9878,6 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -11243,17 +9891,13 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)
 define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -11265,8 +9909,6 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11274,8 +9916,6 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -11289,17 +9929,13 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr)
 define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -11311,8 +9947,6 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11320,8 +9954,6 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -11335,17 +9967,13 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
 define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -11357,8 +9985,6 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11366,8 +9992,6 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -11381,17 +10005,13 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr)
 define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -11403,8 +10023,6 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11412,8 +10030,6 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -11427,17 +10043,13 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64,
 define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -11449,8 +10061,6 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11458,8 +10068,6 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -11473,17 +10081,13 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64,
 define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -11495,8 +10099,6 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11504,8 +10106,6 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -11519,17 +10119,13 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64
 define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -11541,8 +10137,6 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -11550,8 +10144,6 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -11565,19 +10157,13 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64
 define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -11588,19 +10174,13 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
 define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -11614,19 +10194,13 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>
 define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -11637,19 +10211,13 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
 define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -11663,19 +10231,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6
 define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #6
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -11686,20 +10248,14 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -11713,19 +10269,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>
 define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #6
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -11736,20 +10286,14 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -11763,19 +10307,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>
 define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -11786,20 +10324,14 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -11813,19 +10345,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>
 define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -11836,20 +10362,14 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -11863,19 +10383,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>
 define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -11886,20 +10400,14 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -11913,19 +10421,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
 define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -11936,20 +10438,14 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -11963,19 +10459,13 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>
 define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -11986,20 +10476,14 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -12013,19 +10497,13 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x
 define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -12036,20 +10514,14 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -12063,19 +10535,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x
 define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -12086,20 +10552,14 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -12113,19 +10573,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2
 define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -12136,20 +10590,14 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -12163,21 +10611,13 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1
 define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -12188,21 +10628,13 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
 define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -12216,21 +10648,13 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>
 define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #4
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -12241,21 +10665,13 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
 define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -12269,21 +10685,13 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8
 define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -12294,22 +10702,14 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -12323,21 +10723,13 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>
 define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #8
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -12348,22 +10740,14 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -12377,21 +10761,13 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>
 define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -12402,22 +10778,14 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -12431,21 +10799,13 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>
 define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -12456,22 +10816,14 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -12485,21 +10837,13 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>
 define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -12510,22 +10854,14 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -12539,21 +10875,13 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
 define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -12564,22 +10892,14 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -12593,21 +10913,13 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>
 define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -12618,22 +10930,14 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -12647,21 +10951,13 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x
 define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -12672,22 +10968,14 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -12701,21 +10989,13 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x
 define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -12726,22 +11006,14 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -12755,21 +11027,13 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2
 define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -12780,22 +11044,14 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
-; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
-; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -13341,19 +11597,15 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
 define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], #1
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldrb w8, [x0], #1
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.b v0[1], w8
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
@@ -13365,20 +11617,16 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
 define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i8> %A) {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldr b1, [x0]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    mov.b v0[1], v1[0]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
@@ -13431,19 +11679,15 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
 define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], #2
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0], #2
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.h v0[1], w8
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -13456,18 +11700,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -13521,19 +11761,15 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldr w8, [x0], #4
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.s v0[1], w8
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -13546,18 +11782,14 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
@@ -13652,19 +11884,15 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
 define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> %A) {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldr s1, [x0], #4
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -13677,18 +11905,14 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
@@ -13770,11 +11994,9 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ldr d1, [x3]
-; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    cnt.8b v1, v1
 ; CHECK-SD-NEXT:    uaddlp.4h v1, v1
 ; CHECK-SD-NEXT:    uaddlp.2s v1, v1
@@ -13784,10 +12006,8 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
-; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ldr d1, [x3]
 ; CHECK-GI-NEXT:    cnt.8b v1, v1
 ; CHECK-GI-NEXT:    uaddlp.4h v1, v1
@@ -13980,7 +12200,6 @@ define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr q0, [x0]
-; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    bfxil x8, x1, #0, #4
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldrb w0, [x8]
@@ -14005,7 +12224,6 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr q0, [x0]
-; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    bfi x8, x1, #1, #3
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldrh w0, [x8]
@@ -14026,7 +12244,6 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
 define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) {
 ; CHECK-SD-LABEL: load_single_extract_variable_index_i32:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x3
 ; CHECK-SD-NEXT:    ldr w0, [x0, x8, lsl #2]
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
index 2816f91df44ddc..3b1ff45e19d7ab 100644
--- a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
@@ -40,7 +40,6 @@ define i32 @foo(ptr %ptr, i32 %x, i64 %y) !dbg !3 {
 ; CHECK-NEXT:    mov w9, #10 ; =0xa
 ; CHECK-NEXT:    str w9, [x8]
 ; CHECK-NEXT:  LBB0_2: ; %common.ret
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %l = load i32, ptr %ptr, !dbg !4

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index eaa545473b2e0e..cb5a9748e620dd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -351,63 +351,30 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi
 
 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld2lane_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ld2.b { v0, v1 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld2lane_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ld2.b { v0, v1 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld2lane_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.b { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
 define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld3lane_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld3lane_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld3lane_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
 define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld4lane_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld4lane_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld4lane_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x4_t  %tmp2
 }
@@ -418,63 +385,30 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>
 
 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld2lane_8h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ld2.h { v0, v1 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld2lane_8h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ld2.h { v0, v1 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld2lane_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.h { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
 define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld3lane_8h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld3lane_8h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld3lane_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x3_t  %tmp2
 }
 
 define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld4lane_8h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld4lane_8h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld4lane_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x4_t  %tmp2
 }
@@ -485,63 +419,30 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>
 
 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld2lane_4s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ld2.s { v0, v1 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld2lane_4s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld2lane_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.s { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
 define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld3lane_4s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld3lane_4s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld3lane_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
 define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld4lane_4s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld4lane_4s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld4lane_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
@@ -552,63 +453,30 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>
 
 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld2lane_2d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ld2.d { v0, v1 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld2lane_2d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld2lane_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.d { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
 define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld3lane_2d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld3lane_2d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld3lane_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
 define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-SD-LABEL: ld4lane_2d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld4lane_2d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld4lane_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
@@ -1088,17 +956,13 @@ define <1 x i64> @ld1_1d(ptr %p) {
 define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
 ; CHECK-SD-LABEL: ld1_8b:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ld1_8b:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr b1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov.b v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i8, ptr %bar
@@ -1109,9 +973,7 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
 define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i16, ptr %bar
@@ -1122,9 +984,7 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i32, ptr %bar
@@ -1135,9 +995,7 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2s_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load float, ptr %bar

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
index b498611242d469..ddacb5a7647da8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -132,7 +132,6 @@ declare i64 @llvm.aarch64.ldxr.p0(ptr) nounwind
 define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stxrb w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i8 %val to i64
@@ -144,7 +143,6 @@ define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
 define dso_local i32 @test_store_i16(i32, i16 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stxrh w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i16 %val to i64
@@ -312,7 +310,6 @@ declare i64 @llvm.aarch64.ldaxr.p0(ptr) nounwind
 define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_release_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stlxrb w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i8 %val to i64
@@ -324,7 +321,6 @@ define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
 define dso_local i32 @test_store_release_i16(i32, i16 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_release_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stlxrh w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i16 %val to i64

diff  --git a/llvm/test/CodeGen/AArch64/arm64-mul.ll b/llvm/test/CodeGen/AArch64/arm64-mul.ll
index 5ae2722693f898..f39869ef0ed261 100644
--- a/llvm/test/CodeGen/AArch64/arm64-mul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-mul.ll
@@ -138,7 +138,6 @@ entry:
 define i64 @t10(i32 %a) nounwind {
 ; CHECK-LABEL: t10:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    mov w9, #2 // =0x2
 ; CHECK-NEXT:    movk w9, #32768, lsl #16

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
index f1678ca19f47d5..b94629ebff3b9d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
@@ -515,7 +515,6 @@ entry:
 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
 ; CHECK-LABEL: test_vmul_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -528,7 +527,6 @@ entry:
 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
 ; CHECK-LABEL: test_vmulq_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -543,7 +541,6 @@ entry:
 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 {
 ; CHECK-LABEL: test_vmulq_n_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -556,7 +553,6 @@ entry:
 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfma_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -569,7 +565,6 @@ entry:
 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfmaq_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -584,7 +579,6 @@ entry:
 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfms_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmls v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -598,7 +592,6 @@ entry:
 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfmsq_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmls v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index cb87ba9a4ed6c2..d5d3e807d975ff 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -65,7 +65,6 @@ declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmla_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -78,7 +77,6 @@ entry:
 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.8h, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -91,7 +89,6 @@ entry:
 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmla_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -104,7 +101,6 @@ entry:
 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -165,7 +161,6 @@ entry:
 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmls_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -178,7 +173,6 @@ entry:
 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.8h, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -191,7 +185,6 @@ entry:
 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmls_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -204,7 +197,6 @@ entry:
 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -265,7 +257,6 @@ entry:
 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -277,7 +268,6 @@ entry:
 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -289,7 +279,6 @@ entry:
 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -301,7 +290,6 @@ entry:
 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -313,7 +301,6 @@ entry:
 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -325,7 +312,6 @@ entry:
 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -337,7 +323,6 @@ entry:
 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -349,7 +334,6 @@ entry:
 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -449,7 +433,6 @@ entry:
 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfma_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -463,7 +446,6 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmaq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -499,7 +481,6 @@ entry:
 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfms_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -512,7 +493,6 @@ entry:
 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmsq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -549,7 +529,6 @@ entry:
 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
 ; CHECK-LABEL: test_vfmaq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2d, v1.2d, v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -574,7 +553,6 @@ entry:
 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
 ; CHECK-LABEL: test_vfmsq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.2d, v1.2d, v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -626,7 +604,6 @@ declare double @llvm.fma.f64(double, double, double)
 define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmss_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -675,7 +652,6 @@ entry:
 define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmss_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -712,7 +688,6 @@ entry:
 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -725,7 +700,6 @@ entry:
 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -762,7 +736,6 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -776,7 +749,6 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -816,7 +788,6 @@ entry:
 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -829,7 +800,6 @@ entry:
 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -866,7 +836,6 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -880,7 +849,6 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -920,7 +888,6 @@ entry:
 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -933,7 +900,6 @@ entry:
 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -970,7 +936,6 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -984,7 +949,6 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1024,7 +988,6 @@ entry:
 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1037,7 +1000,6 @@ entry:
 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,7 +1036,6 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1088,7 +1049,6 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1128,7 +1088,6 @@ entry:
 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.4s, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1140,7 +1099,6 @@ entry:
 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.2d, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1152,7 +1110,6 @@ entry:
 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.4s, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1164,7 +1121,6 @@ entry:
 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.2d, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1176,7 +1132,6 @@ entry:
 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.4s, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1189,7 +1144,6 @@ entry:
 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.2d, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1202,7 +1156,6 @@ entry:
 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.4s, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1215,7 +1168,6 @@ entry:
 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.2d, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1320,7 +1272,6 @@ entry:
 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1333,7 +1284,6 @@ entry:
 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1346,7 +1296,6 @@ entry:
 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1360,7 +1309,6 @@ entry:
 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1374,7 +1322,6 @@ entry:
 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1387,7 +1334,6 @@ entry:
 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1400,7 +1346,6 @@ entry:
 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1414,7 +1359,6 @@ entry:
 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1428,7 +1372,6 @@ entry:
 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.4s, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1440,7 +1383,6 @@ entry:
 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.2d, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1474,7 +1416,6 @@ entry:
 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.4s, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1487,7 +1428,6 @@ entry:
 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.2d, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1524,7 +1464,6 @@ entry:
 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1536,7 +1475,6 @@ entry:
 define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1567,7 +1505,6 @@ entry:
 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1579,7 +1516,6 @@ entry:
 define <8 x i16> @test_vqdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1610,7 +1546,6 @@ entry:
 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1622,7 +1557,6 @@ entry:
 define <2 x i32> @test_vqdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1653,7 +1587,6 @@ entry:
 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1665,7 +1598,6 @@ entry:
 define <4 x i32> @test_vqdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1696,7 +1628,6 @@ entry:
 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1708,7 +1639,6 @@ entry:
 define <4 x i16> @test_vqrdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1739,7 +1669,6 @@ entry:
 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1751,7 +1680,6 @@ entry:
 define <8 x i16> @test_vqrdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1782,7 +1710,6 @@ entry:
 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1794,7 +1721,6 @@ entry:
 define <2 x i32> @test_vqrdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1825,7 +1751,6 @@ entry:
 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1837,7 +1762,6 @@ entry:
 define <4 x i32> @test_vqrdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1868,7 +1792,6 @@ entry:
 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmul_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1894,7 +1817,6 @@ entry:
 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1906,7 +1828,6 @@ entry:
 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2047,7 +1968,6 @@ entry:
 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulx_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -2059,7 +1979,6 @@ entry:
 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -2084,7 +2003,6 @@ entry:
 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2142,7 +2060,6 @@ entry:
 define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmla_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2155,7 +2072,6 @@ entry:
 define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2168,7 +2084,6 @@ entry:
 define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmla_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2181,7 +2096,6 @@ entry:
 define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2242,7 +2156,6 @@ entry:
 define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmls_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2255,7 +2168,6 @@ entry:
 define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2268,7 +2180,6 @@ entry:
 define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmls_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2281,7 +2192,6 @@ entry:
 define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2342,7 +2252,6 @@ entry:
 define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2354,7 +2263,6 @@ entry:
 define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2366,7 +2274,6 @@ entry:
 define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2378,7 +2285,6 @@ entry:
 define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2390,7 +2296,6 @@ entry:
 define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2402,7 +2307,6 @@ entry:
 define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2414,7 +2318,6 @@ entry:
 define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2426,7 +2329,6 @@ entry:
 define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2526,7 +2428,6 @@ entry:
 define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfma_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2538,7 +2439,6 @@ entry:
 define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmaq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2572,7 +2472,6 @@ entry:
 define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfms_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2585,7 +2484,6 @@ entry:
 define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmsq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2645,7 +2543,6 @@ entry:
 define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2658,7 +2555,6 @@ entry:
 define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2695,7 +2591,6 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2709,7 +2604,6 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2749,7 +2643,6 @@ entry:
 define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2762,7 +2655,6 @@ entry:
 define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2799,7 +2691,6 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2813,7 +2704,6 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2853,7 +2743,6 @@ entry:
 define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2866,7 +2755,6 @@ entry:
 define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2903,7 +2791,6 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2917,7 +2804,6 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2957,7 +2843,6 @@ entry:
 define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2970,7 +2855,6 @@ entry:
 define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3007,7 +2891,6 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3021,7 +2904,6 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3061,7 +2943,6 @@ entry:
 define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3073,7 +2954,6 @@ entry:
 define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.2d, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3085,7 +2965,6 @@ entry:
 define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3097,7 +2976,6 @@ entry:
 define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.2d, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3109,7 +2987,6 @@ entry:
 define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3122,7 +2999,6 @@ entry:
 define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.2d, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3135,7 +3011,6 @@ entry:
 define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3148,7 +3023,6 @@ entry:
 define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.2d, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3253,7 +3127,6 @@ entry:
 define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3266,7 +3139,6 @@ entry:
 define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3279,7 +3151,6 @@ entry:
 define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3293,7 +3164,6 @@ entry:
 define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3307,7 +3177,6 @@ entry:
 define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3320,7 +3189,6 @@ entry:
 define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3333,7 +3201,6 @@ entry:
 define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3347,7 +3214,6 @@ entry:
 define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3361,7 +3227,6 @@ entry:
 define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3373,7 +3238,6 @@ entry:
 define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.2d, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3407,7 +3271,6 @@ entry:
 define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3420,7 +3283,6 @@ entry:
 define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.2d, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3457,7 +3319,6 @@ entry:
 define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3469,7 +3330,6 @@ entry:
 define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3481,7 +3341,6 @@ entry:
 define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3493,7 +3352,6 @@ entry:
 define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3505,7 +3363,6 @@ entry:
 define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3517,7 +3374,6 @@ entry:
 define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3529,7 +3385,6 @@ entry:
 define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3541,7 +3396,6 @@ entry:
 define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3553,7 +3407,6 @@ entry:
 define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmul_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3565,7 +3418,6 @@ entry:
 define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3624,7 +3476,6 @@ entry:
 define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulx_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3636,7 +3487,6 @@ entry:
 define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3648,7 +3498,6 @@ entry:
 define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3736,7 +3585,6 @@ entry:
 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" {
 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -3748,7 +3596,6 @@ entry:
 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" {
 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
index 79645e32074c89..2c38ec4bfaf133 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
@@ -1049,7 +1049,6 @@ entry:
 define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vaddhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1066,7 +1065,6 @@ entry:
 define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vaddhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1083,7 +1081,6 @@ entry:
 define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vaddhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1100,7 +1097,6 @@ entry:
 define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vaddhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1117,7 +1113,6 @@ entry:
 define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vaddhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1134,7 +1129,6 @@ entry:
 define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vaddhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1211,7 +1205,6 @@ entry:
 define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vraddhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1226,7 +1219,6 @@ entry:
 define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vraddhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1241,7 +1233,6 @@ entry:
 define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vraddhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1256,7 +1247,6 @@ entry:
 define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vraddhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1271,7 +1261,6 @@ entry:
 define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vraddhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1286,7 +1275,6 @@ entry:
 define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vraddhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1373,7 +1361,6 @@ entry:
 define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vsubhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1390,7 +1377,6 @@ entry:
 define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vsubhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1407,7 +1393,6 @@ entry:
 define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vsubhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1424,7 +1409,6 @@ entry:
 define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vsubhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1441,7 +1425,6 @@ entry:
 define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vsubhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1458,7 +1441,6 @@ entry:
 define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vsubhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1535,7 +1517,6 @@ entry:
 define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1550,7 +1531,6 @@ entry:
 define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1565,7 +1545,6 @@ entry:
 define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1580,7 +1559,6 @@ entry:
 define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1595,7 +1573,6 @@ entry:
 define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1610,7 +1587,6 @@ entry:
 define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 17fb312c63754d..8f2338706c8177 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -158,7 +158,6 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add v0.8h, v0.8h, v1.8h
 ; CHECK-SD-NEXT:    addp v0.8h, v0.8h, v0.8h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addp_v8i16:
@@ -179,7 +178,6 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add v0.16b, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    addp v0.16b, v0.16b, v0.16b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addp_v16i8:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index c0d91c1e0c836b..c380648e64046c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -44,9 +44,7 @@ define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
 ; CHECK-LABEL: ins8bw:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[5], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
   ret <8 x i8> %tmp3
@@ -55,9 +53,7 @@ define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
 ; CHECK-LABEL: ins4hw:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
   ret <4 x i16> %tmp3
@@ -66,9 +62,7 @@ define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
 ; CHECK-LABEL: ins2sw:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
   ret <2 x i32> %tmp3
@@ -150,14 +144,12 @@ define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
 ; CHECK-SD-LABEL: ins8b16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v1.b[15], v0.b[2]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ins8b16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b2, v0.b[2]
 ; CHECK-GI-NEXT:    mov v0.16b, v1.16b
 ; CHECK-GI-NEXT:    mov v0.b[15], v2.b[0]
@@ -170,7 +162,6 @@ define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
 ; CHECK-LABEL: ins4h8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.h[7], v0.h[2]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -182,7 +173,6 @@ define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
 ; CHECK-LABEL: ins2s4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -194,7 +184,6 @@ define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
 ; CHECK-LABEL: ins1d2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -206,7 +195,6 @@ define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
 ; CHECK-LABEL: ins2f4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -218,7 +206,6 @@ define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
 ; CHECK-LABEL: ins1f2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -230,7 +217,6 @@ define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
 define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) {
 ; CHECK-LABEL: ins1f2_args_flipped:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <1 x double> %tmp1, i32 0
@@ -241,7 +227,6 @@ define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1)
 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-SD-LABEL: ins16b8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v1.b[7], v0.b[2]
 ; CHECK-SD-NEXT:    fmov d0, d1
 ; CHECK-SD-NEXT:    ret
@@ -251,7 +236,6 @@ define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-GI-NEXT:    mov b2, v0.b[2]
 ; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    mov v0.b[7], v2.b[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
@@ -261,7 +245,6 @@ define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
 ; CHECK-LABEL: ins8h4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v1.h[3], v0.h[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -273,7 +256,6 @@ define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
 ; CHECK-LABEL: ins4s2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v1.s[1], v0.s[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -285,7 +267,6 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
 ; CHECK-LABEL: ins2d1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
@@ -295,7 +276,6 @@ define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
 ; CHECK-LABEL: ins4f2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v1.s[1], v0.s[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -308,7 +288,6 @@ define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
 ; CHECK-SD-LABEL: ins2f1:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ins2f1:
@@ -323,19 +302,15 @@ define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-SD-LABEL: ins8b8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v1.b[4], v0.b[2]
 ; CHECK-SD-NEXT:    fmov d0, d1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ins8b8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b2, v0.b[2]
 ; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
@@ -345,8 +320,6 @@ define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
 ; CHECK-LABEL: ins4h4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.h[3], v0.h[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -358,8 +331,6 @@ define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
 ; CHECK-LABEL: ins2s2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -380,8 +351,6 @@ define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
 ; CHECK-LABEL: ins2f2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -452,7 +421,6 @@ define i64 @umovx2d(<2 x i64> %tmp1) {
 define i32 @umovw8b(<8 x i8> %tmp1) {
 ; CHECK-LABEL: umovw8b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[7]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <8 x i8> %tmp1, i32 7
@@ -463,7 +431,6 @@ define i32 @umovw8b(<8 x i8> %tmp1) {
 define i32 @umovw4h(<4 x i16> %tmp1) {
 ; CHECK-LABEL: umovw4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
@@ -474,13 +441,11 @@ define i32 @umovw4h(<4 x i16> %tmp1) {
 define i32 @umovw2s(<2 x i32> %tmp1) {
 ; CHECK-SD-LABEL: umovw2s:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: umovw2s:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -489,16 +454,10 @@ define i32 @umovw2s(<2 x i32> %tmp1) {
 }
 
 define i64 @umovx1d(<1 x i64> %tmp1) {
-; CHECK-SD-LABEL: umovx1d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: umovx1d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fmov x0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: umovx1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
   ret i64 %tmp3
 }
@@ -560,7 +519,6 @@ define i64 @smovx4s(<4 x i32> %tmp1) {
 define i32 @smovw8b(<8 x i8> %tmp1) {
 ; CHECK-LABEL: smovw8b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[4]
 ; CHECK-NEXT:    add w0, w8, w8
 ; CHECK-NEXT:    ret
@@ -573,7 +531,6 @@ define i32 @smovw8b(<8 x i8> %tmp1) {
 define i32 @smovw4h(<4 x i16> %tmp1) {
 ; CHECK-LABEL: smovw4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.h[2]
 ; CHECK-NEXT:    add w0, w8, w8
 ; CHECK-NEXT:    ret
@@ -586,7 +543,6 @@ define i32 @smovw4h(<4 x i16> %tmp1) {
 define i32 @smovx8b(<8 x i8> %tmp1) {
 ; CHECK-LABEL: smovx8b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.b[6]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <8 x i8> %tmp1, i32 6
@@ -597,7 +553,6 @@ define i32 @smovx8b(<8 x i8> %tmp1) {
 define i32 @smovx4h(<4 x i16> %tmp1) {
 ; CHECK-LABEL: smovx4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
@@ -608,7 +563,6 @@ define i32 @smovx4h(<4 x i16> %tmp1) {
 define i64 @smovx2s(<2 x i32> %tmp1) {
 ; CHECK-LABEL: smovx2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.s[1]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
@@ -619,19 +573,13 @@ define i64 @smovx2s(<2 x i32> %tmp1) {
 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
 ; CHECK-SD-LABEL: test_vcopy_lane_s8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.b[5], v1.b[3]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_vcopy_lane_s8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b1, v1.b[3]
 ; CHECK-GI-NEXT:    mov v0.b[5], v1.b[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
   ret <8 x i8> %vset_lane
@@ -655,8 +603,6 @@ define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.b[7], v0.b[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -793,7 +739,6 @@ define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
 ; CHECK-LABEL: test_vdup_lane_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[5]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -803,7 +748,6 @@ define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
 ; CHECK-LABEL: test_vdup_lane_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[2]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -813,7 +757,6 @@ define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
 ; CHECK-LABEL: test_vdup_lane_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -823,7 +766,6 @@ define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.16b, v0.b[5]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -833,7 +775,6 @@ define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[2]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -843,7 +784,6 @@ define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[1]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -853,7 +793,6 @@ define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
@@ -869,7 +808,6 @@ define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
 ; CHECK-GI-LABEL: test_vdup_laneq_s8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    dup v0.16b, v0.b[5]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i8> %shuffle
@@ -884,7 +822,6 @@ define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
 ; CHECK-GI-LABEL: test_vdup_laneq_s16:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    dup v0.8h, v0.h[2]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i16> %shuffle
@@ -899,7 +836,6 @@ define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
 ; CHECK-GI-LABEL: test_vdup_laneq_s32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    dup v0.4s, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
   ret <2 x i32> %shuffle
@@ -1124,18 +1060,11 @@ define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
 }
 
 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv1f64tov8i8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    scvtf d0, d0
-; CHECK-SD-NEXT:    neg v0.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_bitcastv1f64tov8i8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    scvtf d0, d0
-; CHECK-GI-NEXT:    neg v0.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_bitcastv1f64tov8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf d0, d0
+; CHECK-NEXT:    neg v0.8b, v0.8b
+; CHECK-NEXT:    ret
   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
   %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
   %sub.i = sub <8 x i8> zeroinitializer, %1
@@ -1143,18 +1072,11 @@ define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
 }
 
 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv1f64tov4i16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    scvtf d0, d0
-; CHECK-SD-NEXT:    neg v0.4h, v0.4h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_bitcastv1f64tov4i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    scvtf d0, d0
-; CHECK-GI-NEXT:    neg v0.4h, v0.4h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_bitcastv1f64tov4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf d0, d0
+; CHECK-NEXT:    neg v0.4h, v0.4h
+; CHECK-NEXT:    ret
   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
   %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
   %sub.i = sub <4 x i16> zeroinitializer, %1
@@ -1162,18 +1084,11 @@ define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
 }
 
 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
-; CHECK-SD-LABEL: test_bitcastv1f64tov2i32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    scvtf d0, d0
-; CHECK-SD-NEXT:    neg v0.2s, v0.2s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_bitcastv1f64tov2i32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    scvtf d0, d0
-; CHECK-GI-NEXT:    neg v0.2s, v0.2s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_bitcastv1f64tov2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf d0, d0
+; CHECK-NEXT:    neg v0.2s, v0.2s
+; CHECK-NEXT:    ret
   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
   %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
   %sub.i = sub <2 x i32> zeroinitializer, %1
@@ -1183,7 +1098,6 @@ define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
 ; CHECK-SD-LABEL: test_bitcastv1f64tov1i64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    scvtf d0, d0
 ; CHECK-SD-NEXT:    neg d0, d0
 ; CHECK-SD-NEXT:    ret
@@ -1204,7 +1118,6 @@ define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    fneg v0.2s, v0.2s
 ; CHECK-NEXT:    ret
@@ -1260,7 +1173,6 @@ define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
 ; CHECK-GI-LABEL: scalar_to_vector.v2i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %b = insertelement <2 x i32> undef, i32 %a, i32 0
   ret <2 x i32> %b
@@ -1297,7 +1209,6 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 ; CHECK-SD-LABEL: testDUP.v1i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1321,7 +1232,6 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 ; CHECK-LABEL: testDUP.v1i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = extractelement <1 x i16> %a, i32 0
@@ -1339,7 +1249,6 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 ; CHECK-LABEL: testDUP.v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
   %b = extractelement <1 x i32> %a, i32 0
@@ -1353,7 +1262,6 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 define <8 x i8> @getl(<16 x i8> %x) #0 {
 ; CHECK-SD-LABEL: getl:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: getl:
@@ -1400,7 +1308,6 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    ldr h1, [x8]
@@ -1443,7 +1350,6 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    str h0, [x8]
 ; CHECK-SD-NEXT:    ldr d1, [sp, #8]
@@ -1484,13 +1390,11 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
 ; CHECK-SD-LABEL: test_dup_v2i32_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[2]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_dup_v2i32_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
 ; CHECK-GI-NEXT:    dup v0.4h, w8
 ; CHECK-GI-NEXT:    ret
@@ -1532,7 +1436,6 @@ entry:
 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
 ; CHECK-SD-LABEL: test_dup_v1i64_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1554,7 +1457,6 @@ entry:
 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
 ; CHECK-SD-LABEL: test_dup_v1i64_v2i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1711,15 +1613,12 @@ declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
 ; CHECK-SD-LABEL: test_concat_undef_v1i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_undef_v1i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov v0.s[1], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %0 = extractelement <2 x i32> %a, i32 0
@@ -1744,7 +1643,6 @@ entry:
 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1763,7 +1661,6 @@ define <2 x i32> @test_concat_
diff _v1i32_v1i32(i32 %a, i32 %b) {
 ; CHECK-NEXT:    sqabs s0, s1
 ; CHECK-NEXT:    fmov w8, s2
 ; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
@@ -1783,9 +1680,7 @@ define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
 ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI126_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI126_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1796,14 +1691,12 @@ entry:
 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v16i8_v8i8_v16i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b3, v0.b[1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI127_0
 ; CHECK-GI-NEXT:    mov v1.b[0], v0.b[0]
@@ -1847,7 +1740,6 @@ entry:
 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v16i8_v16i8_v8i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1855,7 +1747,6 @@ define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov b3, v0.b[1]
 ; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov b4, v0.b[2]
 ; CHECK-GI-NEXT:    mov v2.b[1], v3.b[0]
 ; CHECK-GI-NEXT:    mov b3, v0.b[3]
@@ -1925,17 +1816,13 @@ entry:
 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v16i8_v8i8_v8i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b3, v0.b[1]
 ; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov b4, v0.b[2]
 ; CHECK-GI-NEXT:    mov v2.b[1], v3.b[0]
 ; CHECK-GI-NEXT:    mov b3, v0.b[3]
@@ -2011,9 +1898,7 @@ define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
 ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI130_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI130_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2024,16 +1909,14 @@ entry:
 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v8i16_v4i16_v8i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    adrp x8, .LCPI131_0
 ; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI131_0
 ; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
 ; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
@@ -2056,14 +1939,12 @@ entry:
 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v8i16_v8i16_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.h[0], v0.h[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.h[1], v0.h[1]
 ; CHECK-GI-NEXT:    mov v2.h[2], v0.h[2]
 ; CHECK-GI-NEXT:    mov v2.h[3], v0.h[3]
@@ -2096,16 +1977,12 @@ entry:
 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v8i16_v4i16_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov v2.h[0], v0.h[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.h[1], v0.h[1]
 ; CHECK-GI-NEXT:    mov v2.h[2], v0.h[2]
 ; CHECK-GI-NEXT:    mov v2.h[3], v0.h[3]
@@ -2144,9 +2021,7 @@ define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
 ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI134_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI134_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2157,16 +2032,14 @@ entry:
 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v4i32_v2i32_v4i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    adrp x8, .LCPI135_0
 ; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI135_0
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI135_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
@@ -2183,14 +2056,12 @@ entry:
 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v4i32_v4i32_v2i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v2i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.s[0], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    mov v2.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    mov v2.s[3], v1.s[1]
@@ -2211,8 +2082,6 @@ entry:
 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2233,7 +2102,6 @@ entry:
 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -2246,14 +2114,12 @@ entry:
 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v2i64_v2i64_v1i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v2i64_v2i64_v1i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2267,8 +2133,6 @@ entry:
 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2346,7 +2210,6 @@ define <16 x i8> @concat_vector_v16i8_const() {
 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
@@ -2356,7 +2219,6 @@ define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 ; CHECK-LABEL: concat_vector_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
@@ -2366,7 +2228,6 @@ define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 ; CHECK-SD-LABEL: concat_vector_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2382,7 +2243,6 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
@@ -2392,7 +2252,6 @@ define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
 ; CHECK-SD-LABEL: concat_vector_v16i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.16b, v0.b[0]
 ; CHECK-SD-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
index 6327679756739a..d04bac78377bfb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
@@ -7,12 +7,11 @@ define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.4s, #2
 ; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-NEXT:    mov v1.16b, v3.16b
 ; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
 ; CHECK-NEXT:    ld2 { v0.s, v1.s }[1], [x0]
 ; CHECK-NEXT:    mov v1.16b, v2.16b
 ; CHECK-NEXT:    ld2 { v0.s, v1.s }[1], [x1]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1
 ; CHECK-NEXT:    ret
 entry:
   %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, ptr %a)
@@ -25,17 +24,16 @@ entry:
 define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) {
 ; CHECK-LABEL: copyTuple.QTriple:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $q31_q0_q1
 ; CHECK-NEXT:    movi v31.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    mov v3.16b, v1.16b
+; CHECK-NEXT:    mov v1.16b, v31.16b
+; CHECK-NEXT:    ld3 { v1.s, v2.s, v3.s }[1], [x0]
 ; CHECK-NEXT:    mov v2.16b, v31.16b
 ; CHECK-NEXT:    mov v3.16b, v0.16b
-; CHECK-NEXT:    mov v4.16b, v1.16b
-; CHECK-NEXT:    ld3 { v2.s, v3.s, v4.s }[1], [x0]
-; CHECK-NEXT:    mov v3.16b, v31.16b
-; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    ld3 { v2.s, v3.s, v4.s }[1], [x1]
-; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ld3 { v1.s, v2.s, v3.s }[1], [x1]
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
 entry:
   %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)
@@ -48,20 +46,19 @@ entry:
 define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) {
 ; CHECK-LABEL: copyTuple.QQuad:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $q31_q0_q1_q2
 ; CHECK-NEXT:    movi v31.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    mov v2.16b, v0.16b
-; CHECK-NEXT:    mov v3.16b, v31.16b
+; CHECK-NEXT:    mov v4.16b, v2.16b
+; CHECK-NEXT:    mov v3.16b, v1.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    mov v1.16b, v31.16b
+; CHECK-NEXT:    ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x0]
+; CHECK-NEXT:    mov v2.16b, v31.16b
+; CHECK-NEXT:    mov v3.16b, v0.16b
 ; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    mov v5.16b, v1.16b
-; CHECK-NEXT:    mov v6.16b, v2.16b
-; CHECK-NEXT:    ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0]
-; CHECK-NEXT:    mov v4.16b, v31.16b
-; CHECK-NEXT:    mov v5.16b, v0.16b
-; CHECK-NEXT:    mov v6.16b, v0.16b
-; CHECK-NEXT:    ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1]
-; CHECK-NEXT:    mov v0.16b, v3.16b
+; CHECK-NEXT:    ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x1]
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
 entry:
   %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
index ecf3f69825c0e9..8f6cd8db84257f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -59,8 +59,6 @@ define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: mul1xi64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mul x8, x9, x8
@@ -142,8 +140,6 @@ define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
 define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: sdiv1x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.b[0]
 ; CHECK-NEXT:    smov w9, v0.b[0]
 ; CHECK-NEXT:    sdiv w8, w9, w8
@@ -156,8 +152,6 @@ define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: sdiv8x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.b[1]
 ; CHECK-NEXT:    smov w9, v0.b[1]
 ; CHECK-NEXT:    smov w10, v0.b[0]
@@ -272,8 +266,6 @@ define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: sdiv1x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.h[0]
 ; CHECK-NEXT:    smov w9, v0.h[0]
 ; CHECK-NEXT:    sdiv w8, w9, w8
@@ -286,8 +278,6 @@ define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: sdiv4x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.h[1]
 ; CHECK-NEXT:    smov w9, v0.h[1]
 ; CHECK-NEXT:    smov w10, v0.h[0]
@@ -304,7 +294,6 @@ define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    sdiv w8, w12, w11
 ; CHECK-NEXT:    mov v0.h[2], w10
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = sdiv <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -354,8 +343,6 @@ define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: sdiv1x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    sdiv w8, w9, w8
@@ -368,8 +355,6 @@ define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: sdiv2x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w10, v0.s[1]
@@ -378,7 +363,6 @@ define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    sdiv w9, w10, w9
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = sdiv <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -411,8 +395,6 @@ define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: sdiv1x64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    sdiv x8, x9, x8
@@ -441,8 +423,6 @@ define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
 define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: udiv1x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[0]
 ; CHECK-NEXT:    udiv w8, w9, w8
@@ -455,8 +435,6 @@ define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: udiv8x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.b[1]
 ; CHECK-NEXT:    umov w9, v0.b[1]
 ; CHECK-NEXT:    umov w10, v0.b[0]
@@ -571,8 +549,6 @@ define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: udiv1x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.h[0]
 ; CHECK-NEXT:    umov w9, v0.h[0]
 ; CHECK-NEXT:    udiv w8, w9, w8
@@ -585,8 +561,6 @@ define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: udiv4x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.h[1]
 ; CHECK-NEXT:    umov w9, v0.h[1]
 ; CHECK-NEXT:    umov w10, v0.h[0]
@@ -603,7 +577,6 @@ define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    udiv w8, w12, w11
 ; CHECK-NEXT:    mov v0.h[2], w10
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = udiv <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -653,8 +626,6 @@ define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: udiv1x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    udiv w8, w9, w8
@@ -667,8 +638,6 @@ define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: udiv2x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w10, v0.s[1]
@@ -677,7 +646,6 @@ define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    udiv w9, w10, w9
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = udiv <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -710,8 +678,6 @@ define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: udiv1x64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    udiv x8, x9, x8
@@ -740,8 +706,6 @@ define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
 define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: srem1x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.b[0]
 ; CHECK-NEXT:    smov w9, v0.b[0]
 ; CHECK-NEXT:    sdiv w10, w9, w8
@@ -755,8 +719,6 @@ define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: srem8x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w11, v1.b[0]
 ; CHECK-NEXT:    smov w12, v0.b[0]
 ; CHECK-NEXT:    smov w8, v1.b[1]
@@ -916,8 +878,6 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: srem1x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.h[0]
 ; CHECK-NEXT:    smov w9, v0.h[0]
 ; CHECK-NEXT:    sdiv w10, w9, w8
@@ -931,8 +891,6 @@ define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: srem4x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w11, v1.h[0]
 ; CHECK-NEXT:    smov w12, v0.h[0]
 ; CHECK-NEXT:    smov w8, v1.h[1]
@@ -953,7 +911,6 @@ define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    msub w8, w9, w17, w18
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = srem <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -1011,8 +968,6 @@ define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: srem1x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    sdiv w10, w9, w8
@@ -1026,8 +981,6 @@ define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: srem2x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w11, v1.s[1]
@@ -1038,7 +991,6 @@ define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    msub w9, w13, w11, w12
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = srem <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -1075,8 +1027,6 @@ define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: srem1x64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    sdiv x10, x9, x8
@@ -1108,8 +1058,6 @@ define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
 define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: urem1x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[0]
 ; CHECK-NEXT:    udiv w10, w9, w8
@@ -1123,8 +1071,6 @@ define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: urem8x8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w11, v1.b[0]
 ; CHECK-NEXT:    umov w12, v0.b[0]
 ; CHECK-NEXT:    umov w8, v1.b[1]
@@ -1284,8 +1230,6 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: urem1x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.h[0]
 ; CHECK-NEXT:    umov w9, v0.h[0]
 ; CHECK-NEXT:    udiv w10, w9, w8
@@ -1299,8 +1243,6 @@ define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: urem4x16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w11, v1.h[0]
 ; CHECK-NEXT:    umov w12, v0.h[0]
 ; CHECK-NEXT:    umov w8, v1.h[1]
@@ -1321,7 +1263,6 @@ define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    msub w8, w9, w17, w18
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = urem <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -1379,8 +1320,6 @@ define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: urem1x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    udiv w10, w9, w8
@@ -1394,8 +1333,6 @@ define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: urem2x32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w11, v1.s[1]
@@ -1406,7 +1343,6 @@ define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    msub w9, w13, w11, w12
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = urem <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -1443,8 +1379,6 @@ define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: urem1x64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    udiv x10, x9, x8
@@ -1480,23 +1414,16 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    bl fmodf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 	%tmp3 = frem <2 x float> %A, %B;
@@ -1514,14 +1441,10 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    bl fmodf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -1529,7 +1452,6 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s1, v1.s[2]
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -1537,7 +1459,6 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s1, v1.s[3]
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -1571,14 +1492,10 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
 ; CHECK-NEXT:    mov d0, v0.d[1]
 ; CHECK-NEXT:    mov d1, v1.d[1]
 ; CHECK-NEXT:    bl fmod
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    bl fmod
 ; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    add sp, sp, #64

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll b/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
index 091cda89bfe403..4c8d9691b30181 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
@@ -4,7 +4,6 @@
 define float @test_fmul_lane_ss2S_0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -15,7 +14,6 @@ define float @test_fmul_lane_ss2S_0(float %a, <2 x float> %v) {
 define float @test_fmul_lane_ss2S_1(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -26,7 +24,6 @@ define float @test_fmul_lane_ss2S_1(float %a, <2 x float> %v) {
 define float @test_fmul_lane_ss2S_1_swap(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_1_swap:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -112,7 +109,6 @@ declare float @llvm.aarch64.neon.fmulx.f32(float, float)
 define float @test_fmulx_lane_f32_0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f32_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -123,7 +119,6 @@ define float @test_fmulx_lane_f32_0(float %a, <2 x float> %v) {
 define float @test_fmulx_lane_f32_1(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f32_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -207,7 +202,6 @@ define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
 define float @test_fmulx_horizontal_f32(<2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_horizontal_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmulx s0, s0, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index cad3fb58086d69..faa537fab3cc3d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -19,8 +19,6 @@ define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
 define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
 ; CHECK-LABEL: test_select_cc_v8i8_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
@@ -58,8 +56,6 @@ define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d
 define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
 ; CHECK-LABEL: test_select_cc_v16i8_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -72,8 +68,6 @@ define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x
 define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
 ; CHECK-LABEL: test_select_cc_v16i8_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -169,8 +163,6 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d )
 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
 ; CHECK-NEXT:    ret
@@ -182,8 +174,6 @@ define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1
 define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
@@ -196,8 +186,6 @@ define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2
 define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -248,8 +236,6 @@ define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c,
 define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
 ; CHECK-LABEL: test_select_cc_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -279,8 +265,6 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b
 define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-NEXT:    fcmeq v2.4s, v2.4s, v3.4s
 ; CHECK-NEXT:    dup v2.4s, v2.s[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
@@ -293,8 +277,6 @@ define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b
 define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    fcmeq v2.2d, v2.2d, v3.2d
 ; CHECK-NEXT:    dup v2.2d, v2.d[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
index 7d87be0ce8e1ca..1036dd28dddb3a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
@@ -403,17 +403,13 @@ entry:
 define <8 x i8> @test_vld1_lane_s8(ptr %a, <8 x i8> %b) {
 ; CHECK-GI-LABEL: test_vld1_lane_s8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ld1 { v0.b }[7], [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ;
 ; CHECK-SD-LABEL: test_vld1_lane_s8:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr b1, [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.b[7], v1.b[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -424,9 +420,7 @@ entry:
 define <4 x i16> @test_vld1_lane_s16(ptr %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vld1_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1 { v0.h }[3], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i16, ptr %a, align 2
@@ -437,9 +431,7 @@ entry:
 define <2 x i32> @test_vld1_lane_s32(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vld1_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr %a, align 4
@@ -461,9 +453,7 @@ entry:
 define <2 x float> @test_vld1_lane_f32(ptr %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vld1_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load float, ptr %a, align 4
@@ -606,7 +596,6 @@ entry:
 define void @test_vst1_lane_s8(ptr %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vst1_lane_s8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.b }[7], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -618,7 +607,6 @@ entry:
 define void @test_vst1_lane_s16(ptr %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vst1_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.h }[3], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -628,16 +616,10 @@ entry:
 }
 
 define void @test_vst1_lane0_s16(ptr %a, <4 x i16> %b) {
-; CHECK-GI-LABEL: test_vst1_lane0_s16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    str h0, [x0]
-; CHECK-GI-NEXT:    ret
-;
-; CHECK-SD-LABEL: test_vst1_lane0_s16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    str h0, [x0]
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_vst1_lane0_s16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
 entry:
   %0 = extractelement <4 x i16> %b, i32 0
   store i16 %0, ptr %a, align 2
@@ -647,7 +629,6 @@ entry:
 define void @test_vst1_lane_s32(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vst1_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.s }[1], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -657,16 +638,10 @@ entry:
 }
 
 define void @test_vst1_lane0_s32(ptr %a, <2 x i32> %b) {
-; CHECK-GI-LABEL: test_vst1_lane0_s32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    str s0, [x0]
-; CHECK-GI-NEXT:    ret
-;
-; CHECK-SD-LABEL: test_vst1_lane0_s32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    str s0, [x0]
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_vst1_lane0_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
 entry:
   %0 = extractelement <2 x i32> %b, i32 0
   store i32 %0, ptr %a, align 4
@@ -674,16 +649,10 @@ entry:
 }
 
 define void @test_vst1_lane_s64(ptr %a, <1 x i64> %b) {
-; CHECK-GI-LABEL: test_vst1_lane_s64:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    str d0, [x0]
-; CHECK-GI-NEXT:    ret
-;
-; CHECK-SD-LABEL: test_vst1_lane_s64:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    str d0, [x0]
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_vst1_lane_s64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
 entry:
   %0 = extractelement <1 x i64> %b, i32 0
   store i64 %0, ptr %a, align 8
@@ -693,7 +662,6 @@ entry:
 define void @test_vst1_lane_f32(ptr %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vst1_lane_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.s }[1], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -703,16 +671,10 @@ entry:
 }
 
 define void @test_vst1_lane0_f32(ptr %a, <2 x float> %b) {
-; CHECK-GI-LABEL: test_vst1_lane0_f32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    str s0, [x0]
-; CHECK-GI-NEXT:    ret
-;
-; CHECK-SD-LABEL: test_vst1_lane0_f32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    str s0, [x0]
-; CHECK-SD-NEXT:    ret
+; CHECK-LABEL: test_vst1_lane0_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
 entry:
   %0 = extractelement <2 x float> %b, i32 0
   store float %0, ptr %a, align 4

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
index c9a03c888d5b25..7bc3edf916719f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
@@ -330,7 +330,6 @@ define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -345,7 +344,6 @@ define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
@@ -360,7 +358,6 @@ define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -375,7 +372,6 @@ define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -390,7 +386,6 @@ define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
@@ -405,7 +400,6 @@ define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -420,7 +414,6 @@ define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqshrun_high_n_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrun2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
@@ -434,7 +427,6 @@ define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqshrun_high_n_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrun2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
@@ -448,7 +440,6 @@ define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqshrun_high_n_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrun2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -462,7 +453,6 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrshrn_high_n_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
@@ -476,7 +466,6 @@ define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrshrn_high_n_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
@@ -490,7 +479,6 @@ define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrshrn_high_n_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -504,7 +492,6 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqrshrun_high_n_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrun2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
@@ -518,7 +505,6 @@ define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqrshrun_high_n_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrun2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
@@ -532,7 +518,6 @@ define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqrshrun_high_n_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrun2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -546,7 +531,6 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
@@ -560,7 +544,6 @@ define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
@@ -574,7 +557,6 @@ define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -588,7 +570,6 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
@@ -602,7 +583,6 @@ define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
@@ -616,7 +596,6 @@ define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -630,7 +609,6 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
@@ -644,7 +622,6 @@ define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
@@ -658,7 +635,6 @@ define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -672,7 +648,6 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqrshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
@@ -686,7 +661,6 @@ define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqrshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
@@ -700,7 +674,6 @@ define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqrshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
index 96d5b5ef85415e..a5990956852af7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
@@ -5,7 +5,6 @@ define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_high_s8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -16,7 +15,6 @@ define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_s16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -27,7 +25,6 @@ define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_high_s32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -38,7 +35,6 @@ define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_high_s64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
@@ -49,7 +45,6 @@ define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_high_u8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -60,7 +55,6 @@ define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_u16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -71,7 +65,6 @@ define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_high_u32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -82,7 +75,6 @@ define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_high_u64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
@@ -93,7 +85,6 @@ define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_high_p64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
@@ -104,7 +95,6 @@ define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -115,7 +105,6 @@ define <2 x float> @test_vget_high_f32(<4 x float> %a) {
 ; CHECK-LABEL: test_vget_high_f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
@@ -126,7 +115,6 @@ define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_high_p8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -137,7 +125,6 @@ define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_p16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -148,7 +135,6 @@ define <1 x double> @test_vget_high_f64(<2 x double> %a) {
 ; CHECK-LABEL: test_vget_high_f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
@@ -158,7 +144,6 @@ entry:
 define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_low_s8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -168,7 +153,6 @@ entry:
 define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -178,7 +162,6 @@ entry:
 define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_low_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -188,7 +171,6 @@ entry:
 define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_low_s64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
@@ -198,7 +180,6 @@ entry:
 define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_low_u8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -208,7 +189,6 @@ entry:
 define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_u16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -218,7 +198,6 @@ entry:
 define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_low_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -228,7 +207,6 @@ entry:
 define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_low_u64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
@@ -238,7 +216,6 @@ entry:
 define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_low_p64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
@@ -248,7 +225,6 @@ entry:
 define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -258,7 +234,6 @@ entry:
 define <2 x float> @test_vget_low_f32(<4 x float> %a) {
 ; CHECK-LABEL: test_vget_low_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
@@ -268,7 +243,6 @@ entry:
 define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_low_p8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -278,7 +252,6 @@ entry:
 define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_p16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -288,7 +261,6 @@ entry:
 define <1 x double> @test_vget_low_f64(<2 x double> %a) {
 ; CHECK-LABEL: test_vget_low_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
index 6c70d19a977a53..eda4e4f743f6e7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
@@ -8,8 +8,6 @@
 define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
 ; CHECK-LABEL: test_sext_extr_cmp_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    cmp x9, x8
@@ -88,8 +86,6 @@ define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
 define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
 ; CHECK-LABEL: test_br_extr_cmp:
 ; CHECK:       // %bb.0: // %common.ret
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    cmp x9, x8
@@ -110,7 +106,6 @@ if.end:
 define <1 x float> @test_vselect_f32(<1 x float> %i105, <1 x float> %in) {
 ; CHECK-LABEL: test_vselect_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcmp s0, s0
 ; CHECK-NEXT:    cset w8, vs
 ; CHECK-NEXT:    fmov s2, w8
@@ -126,12 +121,9 @@ define <1 x float> @test_vselect_f32(<1 x float> %i105, <1 x float> %in) {
 define <1 x half> @test_vselect_f16(<1 x half> %i105, <1 x half> %in) {
 ; CHECK-LABEL: test_vselect_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fcvt s2, h0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fcmp s2, s2
 ; CHECK-NEXT:    fcsel s0, s1, s0, vs
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %i179 = fcmp uno <1 x half> %i105, zeroinitializer
   %i180 = select <1 x i1> %i179, <1 x half> %in, <1 x half> %i105
@@ -143,11 +135,8 @@ define <1 x half> @test_select_f16(half %a, half %b, <1 x half> %c, <1 x half> %
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvt s1, h1
 ; CHECK-NEXT:    fcvt s0, h0
-; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, eq
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %cmp31 = fcmp oeq half %a, %b
   %e = select i1 %cmp31, <1 x half> %c, <1 x half> %d

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index 3d52a2c044a0cb..8d141b5995510f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -134,7 +134,6 @@ define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32>
 define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlah_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    sqadd v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    ret
@@ -161,7 +160,6 @@ entry:
 define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlah_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    sqadd v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    ret
@@ -188,7 +186,6 @@ entry:
 define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    sqsub v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    ret
@@ -215,7 +212,6 @@ entry:
 define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    sqsub v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    ret
@@ -247,7 +243,6 @@ entry:
 define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[1]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqadd v0.4h, v1.4h, v0.4h
@@ -282,7 +277,6 @@ entry:
 define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqadd s0, s1, s0
@@ -315,7 +309,6 @@ entry:
 define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[1]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqsub v0.4h, v1.4h, v0.4h
@@ -350,7 +343,6 @@ entry:
 define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqsub s0, s1, s0
@@ -500,7 +492,6 @@ define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) {
 ; CHECK-LABEL: test_sqrdmlah_extract_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmulh v0.4h, v1.4h, v0.h[1]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqadd v0.4h, v1.4h, v0.4h
@@ -648,7 +639,6 @@ define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlah v1.4h, v2.4h, v0.h[3]
 ; CHECK-NEXT:    umov w0, v1.h[0]
 ; CHECK-NEXT:    ret
@@ -666,7 +656,6 @@ define i32 @test_vqrdmlahs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlah s1, s2, v0.s[1]
 ; CHECK-NEXT:    fmov w0, s1
 ; CHECK-NEXT:    ret
@@ -788,7 +777,6 @@ define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlsh v1.4h, v2.4h, v0.h[3]
 ; CHECK-NEXT:    umov w0, v1.h[0]
 ; CHECK-NEXT:    ret
@@ -806,7 +794,6 @@ define i32 @test_vqrdmlshs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlsh s1, s2, v0.s[1]
 ; CHECK-NEXT:    fmov w0, s1
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
index 80314765abdd1f..141d8d884f3374 100644
--- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
@@ -8,7 +8,6 @@ define void @test(ptr %p1, i32 %v1) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    fmov.2d v0, #2.00000000
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr s0, [x8]
@@ -28,7 +27,6 @@ define void @test2(ptr %p1, i32 %v1) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movi.16b v0, #63
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr s0, [x8]
@@ -78,7 +76,6 @@ define <4 x float> @testv4i16(<2 x float> %l1) {
 ; CHECK-LABEL: testv4i16:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.4h v1, #16
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.d v1[1], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index 0030e9ce80abb4..25e5194d429a7c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -39,7 +39,6 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-LABEL: cnt32_advsimd_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    cnt.8b v0, v0
@@ -65,7 +64,6 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ;
 ; CHECK-CSSC-LABEL: cnt32_advsimd_2:
 ; CHECK-CSSC:       // %bb.0:
-; CHECK-CSSC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CSSC-NEXT:    fmov w8, s0
 ; CHECK-CSSC-NEXT:    cnt w0, w8
 ; CHECK-CSSC-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f548a0e01feee6..3deabfa66b36d3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -88,7 +88,6 @@ entry:
 define i64 @test_rev_x_srl32(i32 %a) {
 ; CHECK-SD-LABEL: test_rev_x_srl32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    rev x8, x0
 ; CHECK-SD-NEXT:    lsr x0, x8, #32
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
index da6499b7daa82e..304f1b6632c2eb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -275,7 +275,6 @@ entry:
 define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone ssp {
 ; CHECK-LABEL: extendedLeftShiftintToint64By4:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x8, x0, #4, #32
 ; CHECK-NEXT:    add x0, x8, #16
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 5806bcf0dacf16..1591360f04ddf8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -601,7 +601,6 @@ if.end:                                           ; preds = %for.body, %if.else
 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 ; ENABLE-LABEL: callVariadicFunc:
 ; ENABLE:       ; %bb.0: ; %entry
-; ENABLE-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; ENABLE-NEXT:    cbz w0, LBB8_2
 ; ENABLE-NEXT:  ; %bb.1: ; %if.then
 ; ENABLE-NEXT:    sub sp, sp, #64
@@ -631,7 +630,6 @@ define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
 ; DISABLE-NEXT:    .cfi_offset w30, -8
 ; DISABLE-NEXT:    .cfi_offset w29, -16
-; DISABLE-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; DISABLE-NEXT:    cbz w0, LBB8_2
 ; DISABLE-NEXT:  ; %bb.1: ; %if.then
 ; DISABLE-NEXT:    stp x1, x1, [sp, #32]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-stp.ll b/llvm/test/CodeGen/AArch64/arm64-stp.ll
index a393fcb17fcb2b..dbf6e5656aaa84 100644
--- a/llvm/test/CodeGen/AArch64/arm64-stp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-stp.ll
@@ -139,7 +139,6 @@ define void @nosplat_v4i32(i32 %v, ptr %p) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-NEXT:    str w0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index a6a825b26b3b52..94397e2eac0dc9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -948,7 +948,6 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
 define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
 ; CHECK-SD-LABEL: sext_v1x64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    asr x1, x0, #63
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
index a854cb7fec9917..13684cc68b016a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
@@ -21,121 +21,55 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
 }
 
 define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK-SD-LABEL: tbl2_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1 }, v2
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbl2_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1 }, v2
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbl2_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl.8b v0, { v0, v1 }, v2
+; CHECK-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK-SD-LABEL: tbl2_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbl2_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbl2_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-SD-LABEL: tbl3_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbl3_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbl3_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-SD-LABEL: tbl3_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbl3_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbl3_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-SD-LABEL: tbl4_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbl4_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbl4_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-SD-LABEL: tbl4_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbl4_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbl4_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
   ret <16 x i8> %tmp3
 }
@@ -173,32 +107,22 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI8_0]
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1 }, v4
 ; CHECK-SD-NEXT:    tbl.8b v1, { v2, v3 }, v4
 ; CHECK-SD-NEXT:    mov.s v0[1], v1[1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI8_1
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr d4, [x8, :lo12:.LCPI8_1]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
 ; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1 }, v4
 ; CHECK-GI-NEXT:    tbl.8b v1, { v2, v3 }, v4
 ; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI8_0]
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
   %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
@@ -262,23 +186,15 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI9_0
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI9_0]
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI9_1
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI9_1]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
 ; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
@@ -331,11 +247,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s4, w0
 ; CHECK-SD-NEXT:    mov w8, #32 // =0x20
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    mov.b v4[1], w0
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    mov.b v4[2], w0
 ; CHECK-SD-NEXT:    mov.b v4[3], w0
 ; CHECK-SD-NEXT:    mov.b v4[4], w0
@@ -364,10 +276,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fmov s4, w0
 ; CHECK-GI-NEXT:    mov w8, #255 // =0xff
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    mov.b v4[1], w0
 ; CHECK-GI-NEXT:    mov.b v4[2], w0
 ; CHECK-GI-NEXT:    mov.b v4[3], w0
@@ -452,11 +360,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #1 // =0x1
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    fmov s4, w8
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    mov.b v4[1], w8
 ; CHECK-SD-NEXT:    mov.b v4[2], w8
 ; CHECK-SD-NEXT:    mov.b v4[3], w8
@@ -486,11 +390,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #1 // =0x1
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    fmov s4, w8
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    mov.b v4[1], w8
 ; CHECK-GI-NEXT:    mov.b v4[2], w8
 ; CHECK-GI-NEXT:    mov.b v4[3], w8
@@ -595,11 +495,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    movi.2d v4, #0xffffffffffffffff
 ; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ldr q5, [x8, :lo12:.LCPI12_0]
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    tbl.16b v2, { v2, v3 }, v5
 ; CHECK-SD-NEXT:    mov.b v4[0], w0
 ; CHECK-SD-NEXT:    mov.b v4[1], w0
@@ -619,11 +515,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x
 ; CHECK-GI-NEXT:    fmov s4, w0
 ; CHECK-GI-NEXT:    mov w8, #255 // =0xff
 ; CHECK-GI-NEXT:    adrp x9, .LCPI12_1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q5, [x9, :lo12:.LCPI12_1]
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov.b v4[1], w0
 ; CHECK-GI-NEXT:    tbl.16b v2, { v2, v3 }, v5
 ; CHECK-GI-NEXT:    mov.b v4[2], w0
@@ -742,10 +634,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    dup.16b v4, w0
 ; CHECK-SD-NEXT:    mov w8, #255 // =0xff
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    mov.b v4[8], w8
 ; CHECK-SD-NEXT:    mov.b v4[9], w8
 ; CHECK-SD-NEXT:    mov.b v4[10], w8
@@ -765,10 +653,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fmov s4, w0
 ; CHECK-GI-NEXT:    mov w8, #255 // =0xff
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov.b v4[1], w0
 ; CHECK-GI-NEXT:    mov.b v4[2], w0
 ; CHECK-GI-NEXT:    mov.b v4[3], w0
@@ -870,23 +754,15 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI14_0]
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_1
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
 ; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
@@ -972,24 +848,16 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b
 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_2
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI15_2]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI15_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
@@ -1076,24 +944,16 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8>
 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI16_0]
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_2
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI16_2]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_1
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI16_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
@@ -1135,121 +995,55 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
 }
 
 define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-SD-LABEL: tbx2_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
-; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2 }, v3
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbx2_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
-; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2 }, v3
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbx2_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx.8b v0, { v1, v2 }, v3
+; CHECK-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-SD-LABEL: tbx2_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
-; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2 }, v3
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbx2_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
-; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2 }, v3
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbx2_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx.16b v0, { v1, v2 }, v3
+; CHECK-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-SD-LABEL: tbx3_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbx3_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbx3_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
+; CHECK-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-SD-LABEL: tbx3_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbx3_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
-; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbx3_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
+; CHECK-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
-; CHECK-SD-LABEL: tbx4_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbx4_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbx4_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
+; CHECK-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
-; CHECK-SD-LABEL: tbx4_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: tbx4_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
-; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: tbx4_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
+; CHECK-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
   ret <16 x i8> %tmp3
 }

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 38a568ac919168..b6dc0c83c5ac93 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1267,7 +1267,6 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    addhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1275,7 +1274,6 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add v1.8h, v1.8h, v2.8h
 ; CHECK-GI-NEXT:    shrn2 v0.16b, v1.8h, #8
 ; CHECK-GI-NEXT:    ret
@@ -1293,7 +1291,6 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    addhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1301,7 +1298,6 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add v1.4s, v1.4s, v2.4s
 ; CHECK-GI-NEXT:    shrn2 v0.8h, v1.4s, #16
 ; CHECK-GI-NEXT:    ret
@@ -1319,7 +1315,6 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    addhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1327,7 +1322,6 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add v1.2d, v1.2d, v2.2d
 ; CHECK-GI-NEXT:    shrn2 v0.4s, v1.2d, #32
 ; CHECK-GI-NEXT:    ret
@@ -1445,7 +1439,6 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    subhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1453,7 +1446,6 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    sub v1.8h, v1.8h, v2.8h
 ; CHECK-GI-NEXT:    shrn2 v0.16b, v1.8h, #8
 ; CHECK-GI-NEXT:    ret
@@ -1471,7 +1463,6 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    subhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1479,7 +1470,6 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    sub v1.4s, v1.4s, v2.4s
 ; CHECK-GI-NEXT:    shrn2 v0.8h, v1.4s, #16
 ; CHECK-GI-NEXT:    ret
@@ -1497,7 +1487,6 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    subhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1505,7 +1494,6 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    sub v1.2d, v1.2d, v2.2d
 ; CHECK-GI-NEXT:    shrn2 v0.4s, v1.2d, #32
 ; CHECK-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vaddv.ll b/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
index adfe28ea95893b..4a23b3a984eb62 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -18,9 +18,7 @@ define <8 x i8> @test_vaddv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
 ; CHECK-LABEL: test_vaddv_s8_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv b1, v1.8b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[3], v1.b[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a2)
@@ -45,9 +43,7 @@ define <4 x i16> @test_vaddv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
 ; CHECK-LABEL: test_vaddv_s16_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv h1, v1.4h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a2)
@@ -72,9 +68,7 @@ define <2 x i32> @test_vaddv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
 ; CHECK-LABEL: test_vaddv_s32_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp v1.2s, v1.2s, v1.2s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a2)
@@ -121,9 +115,7 @@ define <8 x i8> @test_vaddv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
 ; CHECK-LABEL: test_vaddv_u8_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv b1, v1.8b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[3], v1.b[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a2)
@@ -160,9 +152,7 @@ define <4 x i16> @test_vaddv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
 ; CHECK-LABEL: test_vaddv_u16_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv h1, v1.4h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a2)
@@ -199,9 +189,7 @@ define <2 x i32> @test_vaddv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
 ; CHECK-LABEL: test_vaddv_u32_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp v1.2s, v1.2s, v1.2s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a2)
@@ -267,7 +255,6 @@ define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
 ; CHECK-LABEL: test_vaddv_u64_to_vec:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp d0, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index d4cc154ac6afc0..e5764a0a478816 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -256,7 +256,6 @@ define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp {
 define half @test_vcvt_f16_f32(<1 x float> %x) {
 ; GENERIC-LABEL: test_vcvt_f16_f32:
 ; GENERIC:       // %bb.0:
-; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GENERIC-NEXT:    fcvt h0, s0
 ; GENERIC-NEXT:    ret
 ;
@@ -283,7 +282,6 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
 define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 ; GENERIC-LABEL: test_vcvt_high_f32_f64:
 ; GENERIC:       // %bb.0:
-; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GENERIC-NEXT:    fcvtn2 v0.4s, v1.2d
 ; GENERIC-NEXT:    ret
 ;
@@ -297,7 +295,6 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
 ;
 ; GISEL-LABEL: test_vcvt_high_f32_f64:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    fcvtn2 v0.4s, v1.2d
 ; GISEL-NEXT:    ret
   %cvt = fptrunc <2 x double> %v to <2 x float>
@@ -322,7 +319,6 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 ; GENERIC-LABEL: test_vcvtx_high_f32_f64:
 ; GENERIC:       // %bb.0:
-; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GENERIC-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; GENERIC-NEXT:    ret
 ;
@@ -336,7 +332,6 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
 ;
 ; GISEL-LABEL: test_vcvtx_high_f32_f64:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; GISEL-NEXT:    ret
   %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll b/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
index 94074d1689f6a2..4e167b3c7ce1cd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
@@ -5,7 +5,6 @@ define void @test0f(ptr nocapture %x, float %a) #0 {
 ; CHECK-LABEL: test0f:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -19,7 +18,6 @@ define void @test1f(ptr nocapture %x, float %a) #0 {
 ; CHECK-LABEL: test1f:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov.4s v1, #1.00000000
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -106,7 +104,6 @@ define <16 x i8> @test_insert_v16i8_insert_2_undef_base_
diff erent_valeus(i8 %a,
 define <8 x half> @test_insert_v8f16_insert_1(half %a) {
 ; CHECK-LABEL: test_insert_v8f16_insert_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup.8h v0, v0[0]
 ; CHECK-NEXT:    mov.h v0[7], wzr
 ; CHECK-NEXT:    ret
@@ -125,7 +122,6 @@ define <8 x half> @test_insert_v8f16_insert_2(half %a) {
 ; CHECK-LABEL: test_insert_v8f16_insert_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    mov.h v1[1], v0[0]
 ; CHECK-NEXT:    mov.h v1[2], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
@@ -201,7 +197,6 @@ define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
 ; CHECK-LABEL: test_insert_v2f32_undef_zero_vector:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d1, #0000000000000000
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[1], v0[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -212,7 +207,6 @@ define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
 define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
 ; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    mov.s v0[3], wzr
 ; CHECK-NEXT:    ret
@@ -225,7 +219,6 @@ define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
 define <4 x float> @test_insert_3_f32_undef(float %a) {
 ; CHECK-LABEL: test_insert_3_f32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    ret
   %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %a, i32 0
@@ -238,7 +231,6 @@ define <4 x float> @test_insert_2_f32_undef_zero(float %a) {
 ; CHECK-LABEL: test_insert_2_f32_undef_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    mov.s v1[2], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
@@ -252,7 +244,6 @@ define <2 x double> @test_insert_v2f64_undef_insert1(double %a) {
 ; CHECK-LABEL: test_insert_v2f64_undef_insert1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.d v1[0], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
 ; CHECK-NEXT:    ret
@@ -263,7 +254,6 @@ define <2 x double> @test_insert_v2f64_undef_insert1(double %a) {
 define <4 x float> @test_insert_2_f32_var(float %a, <4 x float> %b) {
 ; CHECK-LABEL: test_insert_2_f32_var:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    mov.s v1[2], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
@@ -286,9 +276,7 @@ define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
 define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
 ; CHECK-LABEL: test_insert_v4f16_f16_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.h v0[0], wzr
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v.0 = insertelement <4 x half> %a, half 0.000000e+00, i32 0
   ret <4 x half> %v.0
@@ -306,9 +294,7 @@ define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
 define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
 ; CHECK-LABEL: test_insert_v2f32_f32_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.s v0[0], wzr
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v.0 = insertelement <2 x float> %a, float 0.000000e+00, i32 0
   ret <2 x float> %v.0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 499786470d4ac1..ebb7062a1faff5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -918,7 +918,6 @@ define <2 x double> @fmls_commuted_neg_2d(ptr %A, ptr %B, ptr %C) nounwind {
 define <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmls_indexed_2s:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmls.2s v0, v2, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -955,7 +954,6 @@ entry:
 define <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmla_indexed_scalar_2s:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $d2
 ; CHECK-NEXT:    fmla.2s v0, v1, v2
 ; CHECK-NEXT:    ret
 entry:
@@ -968,7 +966,6 @@ entry:
 define <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmla_indexed_scalar_4s:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla.4s v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -983,7 +980,6 @@ entry:
 define <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmla_indexed_scalar_2d:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla.2d v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -996,7 +992,6 @@ entry:
 define <2 x float> @fmls_indexed_2s_strict(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmls_indexed_2s_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmls.2s v0, v2, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1033,7 +1028,6 @@ entry:
 define <2 x float> @fmla_indexed_scalar_2s_strict(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmla_indexed_scalar_2s_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla.2s v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1046,7 +1040,6 @@ entry:
 define <4 x float> @fmla_indexed_scalar_4s_strict(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmla_indexed_scalar_4s_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla.4s v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1061,7 +1054,6 @@ entry:
 define <2 x double> @fmla_indexed_scalar_2d_strict(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmla_indexed_scalar_2d_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla.2d v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1080,7 +1072,6 @@ declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x
 define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: mul_4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.4h v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1101,7 +1092,6 @@ define <8 x i16> @mul_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: mul_2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1138,7 +1128,6 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
 define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
 ; CHECK-LABEL: fmul_lane_2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
@@ -1191,7 +1180,6 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
 define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
 ; CHECK-LABEL: fmulx_lane_2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
@@ -1222,7 +1210,6 @@ define <2 x double> @fmulx_lane_2d(<2 x double> %A, <2 x double> %B) nounwind {
 define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: sqdmulh_lane_4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh.4h v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1243,7 +1230,6 @@ define <8 x i16> @sqdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: sqdmulh_lane_2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1276,7 +1262,6 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
 define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: sqrdmulh_lane_4h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh.4h v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1297,7 +1282,6 @@ define <8 x i16> @sqrdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: sqrdmulh_lane_2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1330,7 +1314,6 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
 define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: sqdmull_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull.4s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1341,7 +1324,6 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: sqdmull_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull.2d v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1374,7 +1356,6 @@ define <2 x i64> @sqdmull2_lane_2d(<4 x i32> %A, <4 x i32> %B) nounwind {
 define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: umull_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull.4s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1385,7 +1366,6 @@ define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: umull_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull.2d v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1396,7 +1376,6 @@ define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: smull_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull.4s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1407,7 +1386,6 @@ define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: smull_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull.2d v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1418,7 +1396,6 @@ define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: smlal_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlal.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1431,7 +1408,6 @@ define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: smlal_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlal.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1444,7 +1420,6 @@ define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi
 define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: sqdmlal_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlal.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1457,7 +1432,6 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) noun
 define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: sqdmlal_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlal.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1498,7 +1472,6 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, w1
 ; CHECK-NEXT:    fmov s2, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlal.h s2, h1, v0[1]
 ; CHECK-NEXT:    fmov w0, s2
 ; CHECK-NEXT:    ret
@@ -1516,7 +1489,6 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, w1
 ; CHECK-NEXT:    fmov s2, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlsl.h s2, h1, v0[1]
 ; CHECK-NEXT:    fmov w0, s2
 ; CHECK-NEXT:    ret
@@ -1566,7 +1538,6 @@ define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d1, x0
 ; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlal.s d1, s2, v0[1]
 ; CHECK-NEXT:    fmov x0, d1
 ; CHECK-NEXT:    ret
@@ -1583,7 +1554,6 @@ define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d1, x0
 ; CHECK-NEXT:    fmov s2, w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlsl.s d1, s2, v0[1]
 ; CHECK-NEXT:    fmov x0, d1
 ; CHECK-NEXT:    ret
@@ -1598,7 +1568,6 @@ declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
 define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: umlal_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlal.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1611,7 +1580,6 @@ define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: umlal_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlal.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1625,7 +1593,6 @@ define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi
 define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: smlsl_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlsl.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1638,7 +1605,6 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: smlsl_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlsl.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1651,7 +1617,6 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi
 define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: sqdmlsl_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlsl.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1664,7 +1629,6 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) noun
 define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: sqdmlsl_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlsl.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1703,7 +1667,6 @@ define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nou
 define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: umlsl_lane_4s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlsl.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1716,7 +1679,6 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @umlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: umlsl_lane_2d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlsl.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1873,7 +1835,6 @@ define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind {
 define <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo6:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull2.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1888,7 +1849,6 @@ entry:
 define <4 x i32> @foo6a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo6a:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1903,7 +1863,6 @@ entry:
 define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo7:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull2.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1918,7 +1877,6 @@ entry:
 define <2 x i64> @foo7a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo7a:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1934,7 +1892,6 @@ entry:
 define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull2.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1949,7 +1906,6 @@ entry:
 define <4 x i32> @foo8a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo8a:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1964,7 +1920,6 @@ entry:
 define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo9:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull2.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1979,7 +1934,6 @@ entry:
 define <2 x i64> @foo9a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo9a:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -2090,7 +2044,6 @@ define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
 define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 ; CHECK-LABEL: mlal2_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2.4s v0, v1, v2[3]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -2108,7 +2061,6 @@ define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 ; CHECK-LABEL: mlal2_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2126,7 +2078,6 @@ define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 ; CHECK-LABEL: mlal2_4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2.4s v0, v1, v2[2]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -2144,7 +2095,6 @@ define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 ; CHECK-LABEL: mlal2_5:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2.2d v0, v1, v2[0]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer
@@ -2163,7 +2113,6 @@ define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 define <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp {
 ; CHECK-LABEL: vmulq_n_f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul.2d v0, v0, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2176,7 +2125,6 @@ entry:
 define <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp {
 ; CHECK-LABEL: vmulq_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul.4s v0, v0, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2191,7 +2139,6 @@ entry:
 define <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp {
 ; CHECK-LABEL: vmul_n_f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul.2s v0, v0, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2400,7 +2347,6 @@ define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: vmul_built_dup_fromsmall_test:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.4h v0, v0, v1[3]
 ; CHECK-NEXT:    ret
   %vget_lane = extractelement <4 x i16> %b, i32 3
@@ -2415,7 +2361,6 @@ define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
 define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: vmulq_built_dup_fromsmall_test:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.8h v0, v0, v1[0]
 ; CHECK-NEXT:    ret
   %vget_lane = extractelement <4 x i16> %b, i32 0
@@ -2519,7 +2464,6 @@ define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
 define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK-LABEL: pmull_from_extract_duplane_low:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup.8b v1, v1[0]
 ; CHECK-NEXT:    pmull.8h v0, v0, v1
 ; CHECK-NEXT:    ret
@@ -2533,7 +2477,6 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs)
 define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK-LABEL: pmull_from_extract_duplane_high:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup.16b v1, v1[0]
 ; CHECK-NEXT:    pmull2.8h v0, v0, v1
 ; CHECK-NEXT:    ret
@@ -2633,7 +2576,6 @@ define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x floa
 define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
 ; CHECK-LABEL: scalar_fmla_from_extract_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla.s s0, s1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs = extractelement <2 x float> %rvec, i32 1
@@ -2655,7 +2597,6 @@ define float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x floa
 define float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
 ; CHECK-LABEL: scalar_fmls_from_extract_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls.s s0, s1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs.scal = extractelement <2 x float> %rvec, i32 1
@@ -2703,7 +2644,6 @@ define <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x
 define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) {
 ; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls.2s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
@@ -2726,7 +2666,6 @@ define <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x
 define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) {
 ; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
@@ -2853,8 +2792,6 @@ declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
 define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
 ; CHECK-LABEL: test_mul_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mul x8, x9, x8

diff  --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 9955b253f563e9..b542bca716d9b2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -219,8 +219,6 @@ define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-LABEL: combine_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -230,8 +228,6 @@ define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
 define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine2_v16i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -251,8 +247,6 @@ define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
 define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
 ; CHECK-LABEL: combine_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -262,8 +256,6 @@ define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
 define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
 ; CHECK-SD-LABEL: combine2_v8i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -283,8 +275,6 @@ define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
 define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
 ; CHECK-LABEL: combine_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.4s v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -294,8 +284,6 @@ define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
 define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
 ; CHECK-SD-LABEL: combine2_v4i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.4s v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -315,8 +303,6 @@ define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
 define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-LABEL: combine_v16i8_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -326,8 +312,6 @@ define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine2_v16i8_undef:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -347,8 +331,6 @@ define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
 ; CHECK-LABEL: combine_v8i16_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -359,20 +341,18 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
 define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine_v8i16_8first:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
+; CHECK-SD-NEXT:    fmov d31, d1
 ; CHECK-SD-NEXT:    adrp x8, .LCPI25_0
-; CHECK-SD-NEXT:    fmov d2, d0
-; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI25_0]
-; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI25_0]
+; CHECK-SD-NEXT:    tbl.16b v0, { v31, v0 }, v1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: combine_v8i16_8first:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
+; CHECK-GI-NEXT:    fmov d2, d0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
-; CHECK-GI-NEXT:    fmov d31, d1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI25_0]
-; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT:    tbl.16b v0, { v1, v2 }, v0
 ; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
   ret <16 x i8> %3
@@ -383,20 +363,18 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
 define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine_v8i16_8firstundef:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
+; CHECK-SD-NEXT:    fmov d31, d1
 ; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
-; CHECK-SD-NEXT:    fmov d2, d0
-; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI26_0]
-; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT:    tbl.16b v0, { v31, v0 }, v1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: combine_v8i16_8firstundef:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
+; CHECK-GI-NEXT:    fmov d2, d0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
-; CHECK-GI-NEXT:    fmov d31, d1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI26_0]
-; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
+; CHECK-GI-NEXT:    tbl.16b v0, { v1, v2 }, v0
 ; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
   ret <16 x i8> %3

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
index ad073d96a14830..cc70dc446324da 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
@@ -43,7 +43,6 @@ define i8 @test_valid_wrap_optimizable2(ptr %base, i32 %offset) {
 ; CHECK-LABEL: test_valid_wrap_optimizable2:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    mov w8, #-100 ; =0xffffff9c
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldrb w0, [x8, w1, sxtw]
 ; CHECK-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 6aeeeed94543d3..6e7398b63e6d8f 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -410,7 +410,6 @@ define %T2 @simple_struct(%T1 %0, %T2 %1, %T3, %T4) nounwind {
 ; CHECK-NEXT:     ldr     x8, [x2]
 ; CHECK-NEXT:     ldr     x10, [x3]
 ; CHECK-NEXT:     ldr     d1, [x2, #8]
-; CHECK-NEXT:                                     // kill: def $w1 killed $w1 killed $x1
 ; CHECK-NEXT:     ldr     s0, [sp, #12]
 ; CHECK-NEXT:     ldr     d2, [x3, #8]
 ; CHECK-NEXT:     mov     x2, x8
@@ -511,7 +510,6 @@ define <4 x i8> @small_vector(<4 x i8> %0) {
 ; CHECK-NEXT:     str	w0, [sp, #12]
 ; CHECK-NEXT:     ldr	s0, [sp, #12]
 ; CHECK-NEXT:     ushll	v0.8h, v0.8b, #0
-; CHECK-NEXT:                                           // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:     blr	x9
 ; CHECK-NEXT:     uzp1	v0.8b, v0.8b, v0.8b
 ; CHECK-NEXT:     adrp	x9, __os_arm64x_dispatch_ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index dcc675839b714b..e7c6c643d8adeb 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -424,7 +424,6 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind;
 ; CHECK-NEXT:     str     x8, [sp, #40]
 ; CHECK-NEXT:     mov     x0, x8
 ; CHECK-NEXT:     ldr     s0, [sp, #44]
-; CHECK-NEXT:                                     // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:     .seh_startepilogue
 ; CHECK-NEXT:     ldp     x29, x30, [sp, #96]             // 16-byte Folded Reload
 ; CHECK-NEXT:     .seh_save_fplr  96
@@ -477,7 +476,6 @@ declare <4 x i8> @small_vector(<4 x i8> %0) nounwind;
 ; CHECK-NEXT:     stur	w8, [x29, #-8]
 ; CHECK-NEXT:     ldur	s0, [x29, #-8]
 ; CHECK-NEXT:     ushll	v0.8h, v0.8b, #0
-; CHECK-NEXT:                                           // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:     .seh_startepilogue
 ; CHECK-NEXT:     ldp	x29, x30, [sp, #48]             // 16-byte Folded Reload
 ; CHECK-NEXT:     .seh_save_fplr	48

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
index 70f3b5cc488ea8..884dd3ac9860b7 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -562,7 +562,6 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -590,7 +589,6 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -882,7 +880,6 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -910,7 +907,6 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -7230,7 +7226,6 @@ define dso_local i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -7258,7 +7253,6 @@ define dso_local i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -7390,7 +7384,6 @@ define dso_local i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
@@ -7418,7 +7411,6 @@ define dso_local i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -7550,7 +7542,6 @@ define dso_local i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
@@ -7578,7 +7569,6 @@ define dso_local i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
@@ -7710,7 +7700,6 @@ define dso_local i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset release
 ; CHECK-NOT: dmb
@@ -7738,7 +7727,6 @@ define dso_local i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset release
 ; CHECK-NOT: dmb
@@ -7870,7 +7858,6 @@ define dso_local i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -7898,7 +7885,6 @@ define dso_local i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -8030,7 +8016,6 @@ define dso_local i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -8058,7 +8043,6 @@ define dso_local i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -8190,7 +8174,6 @@ define dso_local i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
@@ -8218,7 +8201,6 @@ define dso_local i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -8350,7 +8332,6 @@ define dso_local i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
@@ -8378,7 +8359,6 @@ define dso_local i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
@@ -8510,7 +8490,6 @@ define dso_local i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset release
 ; CHECK-NOT: dmb
@@ -8538,7 +8517,6 @@ define dso_local i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset release
 ; CHECK-NOT: dmb
@@ -8670,7 +8648,6 @@ define dso_local i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -8698,7 +8675,6 @@ define dso_local i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
index fdb14606d463b7..bbf0afec219b00 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
@@ -374,7 +374,6 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
 ; CHECK-NEXT:  .LBB20_1: // %atomicrmw.start
@@ -392,7 +391,6 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
 ; CHECK-NEXT:  .LBB21_1: // %atomicrmw.start
@@ -420,7 +418,6 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-NEXT:    stlxr w10, w8, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB22_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset release
    ret i32 %old
@@ -616,7 +613,6 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB32_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
    ret i8 %old
@@ -636,7 +632,6 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB33_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
    ret i16 %old
@@ -695,7 +690,6 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stlxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB36_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
    ret i8 %old
@@ -715,7 +709,6 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB37_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
    ret i16 %old
@@ -763,7 +756,6 @@ define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
@@ -777,11 +769,9 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-NEXT:    stxrb w10, w1, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB40_1
 ; CHECK-NEXT:  // %bb.3: // %cmpxchg.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB40_4: // %cmpxchg.nostore
 ; CHECK-NEXT:    clrex
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
@@ -791,7 +781,6 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
@@ -806,11 +795,9 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-NEXT:    cbnz w10, .LBB41_1
 ; CHECK-NEXT:  // %bb.3: // %cmpxchg.success
 ; CHECK-NEXT:    dmb ish
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB41_4: // %cmpxchg.nostore
 ; CHECK-NEXT:    clrex
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
@@ -833,11 +820,9 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NEXT:    stlxr w10, w1, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB42_1
 ; CHECK-NEXT:  // %bb.3: // %cmpxchg.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB42_4: // %cmpxchg.nostore
 ; CHECK-NEXT:    clrex
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release monotonic
    %old = extractvalue { i32, i1 } %pair, 0

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index 679065529090f0..a7311154c4f5d7 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -558,7 +558,6 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; INLINE_ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
@@ -585,7 +584,6 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; INLINE_ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
@@ -621,7 +619,6 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; INLINE_ATOMICS-NEXT:    stlxr w10, w8, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB22_1
 ; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i32:
@@ -833,7 +830,6 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB32_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
    ret i8 %old
@@ -853,7 +849,6 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB33_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
    ret i16 %old
@@ -911,7 +906,6 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stlxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB36_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
    ret i8 %old
@@ -931,7 +925,6 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB37_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
    ret i16 %old
@@ -978,7 +971,6 @@ define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; INLINE_ATOMICS-NEXT:    and w8, w0, #0xff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
@@ -992,11 +984,9 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    stxrb w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB40_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB40_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
@@ -1015,7 +1005,6 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; INLINE_ATOMICS-NEXT:    and w8, w0, #0xffff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
@@ -1029,11 +1018,9 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    stlxrh w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB41_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB41_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
@@ -1065,11 +1052,9 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    stlxr w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB42_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB42_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i32:

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index ed9c1b037d0cc7..7d1f901af1a15d 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -18,7 +18,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
@@ -38,7 +37,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
@@ -83,7 +81,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -106,7 +103,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
@@ -126,7 +122,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
@@ -171,7 +166,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -182,7 +176,6 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -202,15 +195,13 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -231,7 +222,6 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
@@ -271,7 +261,6 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
@@ -281,7 +270,6 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -301,15 +289,13 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -330,7 +316,6 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
@@ -370,7 +355,6 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
@@ -444,7 +428,6 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
@@ -699,7 +682,6 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4:
@@ -814,7 +796,6 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB8_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4:
@@ -937,7 +918,6 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
-; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
@@ -991,10 +971,10 @@ define <2 x double> @test_atomicrmw_fadd_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fadd v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x7, x3
-; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    fmov x4, d2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index 888b795876f7df..2e60365903ef27 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -20,7 +20,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
@@ -40,7 +39,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
@@ -85,7 +83,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -108,7 +105,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
@@ -128,7 +124,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
@@ -173,7 +168,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -184,7 +178,6 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -204,15 +197,13 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -233,7 +224,6 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
@@ -273,7 +263,6 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2
@@ -283,7 +272,6 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -303,15 +291,13 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -332,7 +318,6 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
@@ -372,7 +357,6 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 4
@@ -446,7 +430,6 @@ define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
@@ -531,7 +514,6 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
 define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fcvt s0, h0
 ; NOLSE-NEXT:    fcvt s1, h1
@@ -556,7 +538,6 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fcvt s2, h0
 ; LSE-NEXT:    ldr s0, [x0]
@@ -579,7 +560,6 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB6_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
@@ -652,7 +632,6 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fmov w10, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
@@ -665,16 +644,15 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; NOLSE-NEXT:    ldaxr w9, [x0]
 ; NOLSE-NEXT:    fmov s2, w9
+; NOLSE-NEXT:    lsl w11, w9, #16
 ; NOLSE-NEXT:    mov h3, v2.h[1]
-; NOLSE-NEXT:    fmov w11, s2
-; NOLSE-NEXT:    lsl w11, w11, #16
 ; NOLSE-NEXT:    fmov w10, s3
 ; NOLSE-NEXT:    fmov s3, w11
 ; NOLSE-NEXT:    lsl w10, w10, #16
 ; NOLSE-NEXT:    fmaxnm s3, s3, s1
 ; NOLSE-NEXT:    fmov s2, w10
-; NOLSE-NEXT:    fmaxnm s2, s2, s0
 ; NOLSE-NEXT:    fmov w11, s3
+; NOLSE-NEXT:    fmaxnm s2, s2, s0
 ; NOLSE-NEXT:    ubfx w13, w11, #16, #1
 ; NOLSE-NEXT:    add w11, w11, w8
 ; NOLSE-NEXT:    fmov w10, s2
@@ -696,11 +674,10 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fmov w10, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr s0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w10, w10, #16
 ; LSE-NEXT:    fmov w9, s1
 ; LSE-NEXT:    fmov s2, w10
@@ -738,7 +715,6 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w11, w9
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
@@ -861,7 +837,6 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
-; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
@@ -915,10 +890,10 @@ define <2 x double> @test_atomicrmw_fmax_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fmaxnm v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x7, x3
-; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    fmov x4, d2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index a3665c6e428608..c2b6fa0dcb7a43 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -20,7 +20,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align2:
@@ -40,7 +39,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align2:
@@ -85,7 +83,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -108,7 +105,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align4:
@@ -128,7 +124,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align4:
@@ -173,7 +168,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -184,7 +178,6 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -204,15 +197,13 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -233,7 +224,6 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align2:
@@ -273,7 +263,6 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 2
@@ -283,7 +272,6 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -303,15 +291,13 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -332,7 +318,6 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align4:
@@ -372,7 +357,6 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 4
@@ -446,7 +430,6 @@ define float @test_atomicrmw_fmin_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
@@ -531,7 +514,6 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
 define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_v2f16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fcvt s0, h0
 ; NOLSE-NEXT:    fcvt s1, h1
@@ -556,7 +538,6 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_v2f16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fcvt s2, h0
 ; LSE-NEXT:    ldr s0, [x0]
@@ -579,7 +560,6 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB6_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_v2f16_seq_cst_align4:
@@ -652,7 +632,6 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fmov w10, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
@@ -665,16 +644,15 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; NOLSE-NEXT:    ldaxr w9, [x0]
 ; NOLSE-NEXT:    fmov s2, w9
+; NOLSE-NEXT:    lsl w11, w9, #16
 ; NOLSE-NEXT:    mov h3, v2.h[1]
-; NOLSE-NEXT:    fmov w11, s2
-; NOLSE-NEXT:    lsl w11, w11, #16
 ; NOLSE-NEXT:    fmov w10, s3
 ; NOLSE-NEXT:    fmov s3, w11
 ; NOLSE-NEXT:    lsl w10, w10, #16
 ; NOLSE-NEXT:    fminnm s3, s3, s1
 ; NOLSE-NEXT:    fmov s2, w10
-; NOLSE-NEXT:    fminnm s2, s2, s0
 ; NOLSE-NEXT:    fmov w11, s3
+; NOLSE-NEXT:    fminnm s2, s2, s0
 ; NOLSE-NEXT:    ubfx w13, w11, #16, #1
 ; NOLSE-NEXT:    add w11, w11, w8
 ; NOLSE-NEXT:    fmov w10, s2
@@ -696,11 +674,10 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fmov w10, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr s0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w10, w10, #16
 ; LSE-NEXT:    fmov w9, s1
 ; LSE-NEXT:    fmov s2, w10
@@ -738,7 +715,6 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w11, w9
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
@@ -861,7 +837,6 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
-; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
@@ -915,10 +890,10 @@ define <2 x double> @test_atomicrmw_fmin_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fminnm v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x7, x3
-; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    fmov x4, d2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index 7725ce0e731859..3a55b54b16c8d9 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -18,7 +18,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align2:
@@ -38,7 +37,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align2:
@@ -83,7 +81,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -106,7 +103,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align4:
@@ -126,7 +122,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align4:
@@ -171,7 +166,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -182,7 +176,6 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -202,15 +195,13 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -231,7 +222,6 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align2:
@@ -271,7 +261,6 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, bfloat %value seq_cst, align 2
@@ -281,7 +270,6 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -301,15 +289,13 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    ldr h0, [x0]
+; LSE-NEXT:    mov w8, #32767 // =0x7fff
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -330,7 +316,6 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align4:
@@ -370,7 +355,6 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, bfloat %value seq_cst, align 4
@@ -444,7 +428,6 @@ define float @test_atomicrmw_fsub_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
@@ -699,7 +682,6 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_v2f16_seq_cst_align4:
@@ -814,7 +796,6 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB8_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
-; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_v2bf16_seq_cst_align4:
@@ -937,7 +918,6 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
-; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
@@ -991,10 +971,10 @@ define <2 x double> @test_atomicrmw_fsub_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fsub v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x7, x3
-; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x5, v2.d[1]
 ; LSE-NEXT:    fmov x4, d2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
index 98033a8e449ffb..597b58b6979d74 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
@@ -1,11 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
 
 define half @test_rmw_xchg_f16(ptr %dst, half %new) {
 ; NOLSE-LABEL: test_rmw_xchg_f16:
 ; NOLSE:       // %bb.0:
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w8, s0
 ; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -14,16 +13,13 @@ define half @test_rmw_xchg_f16(ptr %dst, half %new) {
 ; NOLSE-NEXT:    cbnz w10, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
 ; NOLSE-NEXT:    fmov s0, w9
-; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_rmw_xchg_f16:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w8, s0
 ; LSE-NEXT:    swpalh w8, w8, [x0]
 ; LSE-NEXT:    fmov s0, w8
-; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
   %res = atomicrmw xchg ptr %dst, half %new seq_cst
   ret half %res
@@ -103,8 +99,8 @@ define fp128 @test_rmw_xchg_f128(ptr %dst, fp128 %new) {
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
-; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    mov x4, x6
+; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x2, x3, [x0]
 ; LSE-NEXT:    cmp x5, x7
 ; LSE-NEXT:    ccmp x4, x6, #0, eq

diff  --git a/llvm/test/CodeGen/AArch64/bf16-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
index 33997614598c3a..40d0d7cbc923bb 100644
--- a/llvm/test/CodeGen/AArch64/bf16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
@@ -5,8 +5,6 @@
 define bfloat @test_fadd(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fadd:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -21,13 +19,10 @@ define bfloat @test_fadd(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fadd:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -44,8 +39,6 @@ define bfloat @test_fadd(bfloat %a, bfloat %b) #0 {
 define bfloat @test_fsub(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fsub:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -60,13 +53,10 @@ define bfloat @test_fsub(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fsub:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -83,8 +73,6 @@ define bfloat @test_fsub(bfloat %a, bfloat %b) #0 {
 define bfloat @test_fmul(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fmul:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -99,13 +87,10 @@ define bfloat @test_fmul(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fmul:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -122,12 +107,9 @@ define bfloat @test_fmul(bfloat %a, bfloat %b) #0 {
 define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-LABEL: test_fmadd:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w10, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -150,16 +132,12 @@ define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-NEXT:    add w8, w9, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fmadd:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
-; CHECK-BF16-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    lsl w9, w9, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -183,8 +161,6 @@ define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
 define bfloat @test_fdiv(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fdiv:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -199,13 +175,10 @@ define bfloat @test_fdiv(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fdiv:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -223,8 +196,6 @@ define bfloat @test_frem(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_frem:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
@@ -239,15 +210,12 @@ define bfloat @test_frem(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_frem:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -321,11 +289,8 @@ define bfloat @test_tailcall_flipped(bfloat %a, bfloat %b) #0 {
 define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) #0 {
 ; CHECK-LABEL: test_select:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = select i1 %c, bfloat %a, bfloat %b
   ret bfloat %r
@@ -334,19 +299,14 @@ define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) #0 {
 define bfloat @test_select_cc(bfloat %a, bfloat %b, bfloat %c, bfloat %d) #0 {
 ; CHECK-LABEL: test_select_cc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-NEXT:    fmov w8, s3
 ; CHECK-NEXT:    fmov w9, s2
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    lsl w9, w9, #16
 ; CHECK-NEXT:    fmov s2, w8
 ; CHECK-NEXT:    fmov s3, w9
 ; CHECK-NEXT:    fcmp s3, s2
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %cc = fcmp une bfloat %c, %d
   %r = select i1 %cc, bfloat %a, bfloat %b
@@ -356,8 +316,6 @@ define bfloat @test_select_cc(bfloat %a, bfloat %b, bfloat %c, bfloat %d) #0 {
 define float @test_select_cc_f32_f16(float %a, float %b, bfloat %c, bfloat %d) #0 {
 ; CHECK-LABEL: test_select_cc_f32_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-NEXT:    fmov w8, s3
 ; CHECK-NEXT:    fmov w9, s2
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -376,10 +334,7 @@ define bfloat @test_select_cc_f16_f32(bfloat %a, bfloat %b, float %c, float %d)
 ; CHECK-LABEL: test_select_cc_f16_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmp s2, s3
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %cc = fcmp une float %c, %d
   %r = select i1 %cc, bfloat %a, bfloat %b
@@ -389,8 +344,6 @@ define bfloat @test_select_cc_f16_f32(bfloat %a, bfloat %b, float %c, float %d)
 define i1 @test_fcmp_une(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_une:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -407,8 +360,6 @@ define i1 @test_fcmp_une(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ueq(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ueq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -426,8 +377,6 @@ define i1 @test_fcmp_ueq(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ugt(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ugt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -444,8 +393,6 @@ define i1 @test_fcmp_ugt(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_uge(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_uge:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -462,8 +409,6 @@ define i1 @test_fcmp_uge(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ult(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ult:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -480,8 +425,6 @@ define i1 @test_fcmp_ult(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ule(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ule:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -498,8 +441,6 @@ define i1 @test_fcmp_ule(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_uno(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_uno:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -516,8 +457,6 @@ define i1 @test_fcmp_uno(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_one(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -535,8 +474,6 @@ define i1 @test_fcmp_one(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_oeq(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_oeq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -553,8 +490,6 @@ define i1 @test_fcmp_oeq(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ogt(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ogt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -571,8 +506,6 @@ define i1 @test_fcmp_ogt(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_oge(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_oge:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -589,8 +522,6 @@ define i1 @test_fcmp_oge(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_olt(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_olt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -607,8 +538,6 @@ define i1 @test_fcmp_olt(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ole(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ole:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -625,8 +554,6 @@ define i1 @test_fcmp_ole(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ord(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ord:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -643,7 +570,6 @@ define i1 @test_fcmp_ord(bfloat %a, bfloat %b) #0 {
 define void @test_fccmp(bfloat %in, ptr %out) {
 ; CHECK-LABEL: test_fccmp:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    movi v1.2s, #69, lsl #24
 ; CHECK-NEXT:    movi v3.2s, #72, lsl #24
@@ -667,8 +593,6 @@ define void @test_fccmp(bfloat %in, ptr %out) {
 define void @test_br_cc(bfloat %a, bfloat %b, ptr %p1, ptr %p2) #0 {
 ; CHECK-LABEL: test_br_cc:
 ; CHECK:       // %bb.0: // %common.ret
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -725,7 +649,6 @@ declare i1 @test_dummy(ptr %p1) #0
 define i32 @test_fptosi_i32(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptosi_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -738,7 +661,6 @@ define i32 @test_fptosi_i32(bfloat %a) #0 {
 define i64 @test_fptosi_i64(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptosi_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -751,7 +673,6 @@ define i64 @test_fptosi_i64(bfloat %a) #0 {
 define i32 @test_fptoui_i32(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptoui_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -764,7 +685,6 @@ define i32 @test_fptoui_i32(bfloat %a) #0 {
 define i64 @test_fptoui_i64(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptoui_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -786,7 +706,6 @@ define bfloat @test_uitofp_i32(i32 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_uitofp_i32:
@@ -821,7 +740,6 @@ define bfloat @test_uitofp_i64(i64 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_uitofp_i64:
@@ -856,7 +774,6 @@ define bfloat @test_sitofp_i32(i32 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sitofp_i32:
@@ -895,7 +812,6 @@ define bfloat @test_sitofp_i64(i64 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sitofp_i64:
@@ -927,7 +843,6 @@ define bfloat @test_uitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    ucvtf d1, w0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fcvtxn s1, d1
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    ubfx w10, w9, #16, #1
@@ -948,13 +863,11 @@ define bfloat @test_uitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_uitofp_i32_fadd:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    ucvtf d1, w0
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fcvtxn s1, d1
@@ -976,7 +889,6 @@ define bfloat @test_sitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    scvtf d1, w0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fcvtxn s1, d1
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    ubfx w10, w9, #16, #1
@@ -997,13 +909,11 @@ define bfloat @test_sitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sitofp_i32_fadd:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    scvtf d1, w0
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fcvtxn s1, d1
@@ -1033,7 +943,6 @@ define bfloat @test_fptrunc_float(float %a) #0 {
 ; CHECK-CVT-NEXT:    csel w8, w9, w8, vs
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fptrunc_float:
@@ -1055,7 +964,6 @@ define bfloat @test_fptrunc_double(double %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fptrunc_double:
@@ -1070,7 +978,6 @@ define bfloat @test_fptrunc_double(double %a) #0 {
 define float @test_fpext_float(bfloat %a) #0 {
 ; CHECK-LABEL: test_fpext_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -1082,7 +989,6 @@ define float @test_fpext_float(bfloat %a) #0 {
 define double @test_fpext_double(bfloat %a) #0 {
 ; CHECK-LABEL: test_fpext_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -1095,7 +1001,6 @@ define double @test_fpext_double(bfloat %a) #0 {
 define i16 @test_bitcast_bfloattoi16(bfloat %a) #0 {
 ; CHECK-LABEL: test_bitcast_bfloattoi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %r = bitcast bfloat %a to i16
@@ -1106,7 +1011,6 @@ define bfloat @test_bitcast_i16tobfloat(i16 %a) #0 {
 ; CHECK-LABEL: test_bitcast_i16tobfloat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = bitcast i16 %a to bfloat
   ret bfloat %r
@@ -1148,7 +1052,6 @@ declare bfloat @llvm.fmuladd.f16(bfloat %a, bfloat %b, bfloat %c) #0
 define bfloat @test_sqrt(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_sqrt:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -1160,12 +1063,10 @@ define bfloat @test_sqrt(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sqrt:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1180,7 +1081,6 @@ define bfloat @test_powi(bfloat %a, i32 %b) #0 {
 ; CHECK-CVT-LABEL: test_powi:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1192,14 +1092,12 @@ define bfloat @test_powi(bfloat %a, i32 %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_powi:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1216,7 +1114,6 @@ define bfloat @test_sin(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_sin:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1228,14 +1125,12 @@ define bfloat @test_sin(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sin:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1251,7 +1146,6 @@ define bfloat @test_cos(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_cos:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1263,14 +1157,12 @@ define bfloat @test_cos(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_cos:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1286,7 +1178,6 @@ define bfloat @test_tan(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_tan:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1298,14 +1189,12 @@ define bfloat @test_tan(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_tan:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1321,7 +1210,6 @@ define bfloat @test_acos(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_acos:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1333,14 +1221,12 @@ define bfloat @test_acos(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_acos:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1356,7 +1242,6 @@ define bfloat @test_asin(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_asin:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1368,14 +1253,12 @@ define bfloat @test_asin(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_asin:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1391,7 +1274,6 @@ define bfloat @test_atan(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_atan:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1403,14 +1285,12 @@ define bfloat @test_atan(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_atan:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1426,8 +1306,6 @@ define bfloat @test_atan2(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_atan2:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
@@ -1442,15 +1320,12 @@ define bfloat @test_atan2(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_atan2:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1469,7 +1344,6 @@ define bfloat @test_cosh(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_cosh:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1481,14 +1355,12 @@ define bfloat @test_cosh(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_cosh:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1504,7 +1376,6 @@ define bfloat @test_sinh(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_sinh:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1516,14 +1387,12 @@ define bfloat @test_sinh(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sinh:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1539,7 +1408,6 @@ define bfloat @test_tanh(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_tanh:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1551,14 +1419,12 @@ define bfloat @test_tanh(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_tanh:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1574,8 +1440,6 @@ define bfloat @test_pow(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_pow:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
@@ -1590,15 +1454,12 @@ define bfloat @test_pow(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_pow:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1617,7 +1478,6 @@ define bfloat @test_exp(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_exp:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1629,14 +1489,12 @@ define bfloat @test_exp(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_exp:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1652,7 +1510,6 @@ define bfloat @test_exp2(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_exp2:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1664,14 +1521,12 @@ define bfloat @test_exp2(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_exp2:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1687,7 +1542,6 @@ define bfloat @test_log(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_log:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1699,14 +1553,12 @@ define bfloat @test_log(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_log:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1722,7 +1574,6 @@ define bfloat @test_log10(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_log10:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1734,14 +1585,12 @@ define bfloat @test_log10(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_log10:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1757,7 +1606,6 @@ define bfloat @test_log2(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_log2:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1769,14 +1617,12 @@ define bfloat @test_log2(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_log2:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1791,9 +1637,6 @@ define bfloat @test_log2(bfloat %a) #0 {
 define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-LABEL: test_fma:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h2 killed $h2 def $s2
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s2
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
@@ -1811,14 +1654,10 @@ define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-NEXT:    add w8, w9, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fma:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h2 killed $h2 def $s2
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s2
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    fmov w10, s0
@@ -1838,11 +1677,9 @@ define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
 define bfloat @test_fabs(bfloat %a) #0 {
 ; CHECK-LABEL: test_fabs:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w8, w8, #0x7fff
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = call bfloat @llvm.fabs.f16(bfloat %a)
   ret bfloat %r
@@ -1851,8 +1688,6 @@ define bfloat @test_fabs(bfloat %a) #0 {
 define bfloat @test_minnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_minnum:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -1867,13 +1702,10 @@ define bfloat @test_minnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_minnum:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1890,8 +1722,6 @@ define bfloat @test_minnum(bfloat %a, bfloat %b) #0 {
 define bfloat @test_maxnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_maxnum:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -1906,13 +1736,10 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_maxnum:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1929,8 +1756,6 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) #0 {
 define bfloat @test_copysign(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1942,13 +1767,10 @@ define bfloat @test_copysign(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_copysign:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1966,25 +1788,20 @@ define bfloat @test_copysign(bfloat %a, bfloat %b) #0 {
 define bfloat @test_copysign_f32(bfloat %a, float %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign_f32:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
-; CHECK-CVT-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
 ; CHECK-CVT-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_copysign_f32:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
-; CHECK-BF16-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
 ; CHECK-BF16-NEXT:    bif v0.16b, v1.16b, v2.16b
@@ -1998,7 +1815,6 @@ define bfloat @test_copysign_f32(bfloat %a, float %b) #0 {
 define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign_f64:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fcvt s1, d1
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
@@ -2008,12 +1824,10 @@ define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_copysign_f64:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fcvt s1, d1
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
@@ -2032,8 +1846,6 @@ define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
 define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign_extended:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
@@ -2050,8 +1862,6 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 ;
 ; CHECK-BF16-LABEL: test_copysign_extended:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
@@ -2060,7 +1870,6 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 ; CHECK-BF16-NEXT:    fmov s0, w8
 ; CHECK-BF16-NEXT:    fmov s1, w9
 ; CHECK-BF16-NEXT:    bit v0.16b, v1.16b, v2.16b
-; CHECK-BF16-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %b)
   %xr = fpext bfloat %r to float
@@ -2070,7 +1879,6 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 define bfloat @test_floor(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_floor:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2082,12 +1890,10 @@ define bfloat @test_floor(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_floor:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2101,7 +1907,6 @@ define bfloat @test_floor(bfloat %a) #0 {
 define bfloat @test_ceil(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_ceil:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2113,12 +1918,10 @@ define bfloat @test_ceil(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_ceil:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2132,7 +1935,6 @@ define bfloat @test_ceil(bfloat %a) #0 {
 define bfloat @test_trunc(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_trunc:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2144,12 +1946,10 @@ define bfloat @test_trunc(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_trunc:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2163,7 +1963,6 @@ define bfloat @test_trunc(bfloat %a) #0 {
 define bfloat @test_rint(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_rint:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2175,12 +1974,10 @@ define bfloat @test_rint(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_rint:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2194,7 +1991,6 @@ define bfloat @test_rint(bfloat %a) #0 {
 define bfloat @test_nearbyint(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_nearbyint:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2206,12 +2002,10 @@ define bfloat @test_nearbyint(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_nearbyint:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2225,7 +2019,6 @@ define bfloat @test_nearbyint(bfloat %a) #0 {
 define bfloat @test_round(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_round:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2237,12 +2030,10 @@ define bfloat @test_round(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_round:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2256,7 +2047,6 @@ define bfloat @test_round(bfloat %a) #0 {
 define bfloat @test_roundeven(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_roundeven:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2268,12 +2058,10 @@ define bfloat @test_roundeven(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_roundeven:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2287,12 +2075,9 @@ define bfloat @test_roundeven(bfloat %a) #0 {
 define bfloat @test_fmuladd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-LABEL: test_fmuladd:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w10, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -2315,16 +2100,12 @@ define bfloat @test_fmuladd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-NEXT:    add w8, w9, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fmuladd:
 ; CHECK-BF16:       // %bb.0:
-; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
-; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
-; CHECK-BF16-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    lsl w9, w9, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8

diff  --git a/llvm/test/CodeGen/AArch64/bf16-select.ll b/llvm/test/CodeGen/AArch64/bf16-select.ll
index e3479f49e86b61..17bb8446545fdb 100644
--- a/llvm/test/CodeGen/AArch64/bf16-select.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-select.ll
@@ -7,11 +7,8 @@
 define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) {
 ; CHECK-BASE-LABEL: test_select:
 ; CHECK-BASE:       // %bb.0:
-; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BASE-NEXT:    cmp w0, #0
-; CHECK-BASE-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-BASE-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-BASE-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_select:
@@ -27,10 +24,7 @@ define bfloat @test_select_fcc(bfloat %a, bfloat %b, float %c, float %d) {
 ; CHECK-BASE-LABEL: test_select_fcc:
 ; CHECK-BASE:       // %bb.0:
 ; CHECK-BASE-NEXT:    fcmp s2, s3
-; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-BASE-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-BASE-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-BASE-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_select_fcc:
@@ -46,11 +40,8 @@ define bfloat @test_select_fcc(bfloat %a, bfloat %b, float %c, float %d) {
 define bfloat @test_select_icc(bfloat %a, bfloat %b, i32 %c, i32 %d) {
 ; CHECK-BASE-LABEL: test_select_icc:
 ; CHECK-BASE:       // %bb.0:
-; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BASE-NEXT:    cmp w0, w1
-; CHECK-BASE-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-BASE-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-BASE-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_select_icc:

diff  --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
index d59de3c56f4ee2..408cef22b91dd4 100644
--- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
@@ -99,7 +99,6 @@ entry:
 define dso_local <4 x bfloat> @test_vmov_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vmov_n_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -114,7 +113,6 @@ entry:
 define dso_local <8 x bfloat> @test_vmovq_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vmovq_n_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -129,7 +127,6 @@ entry:
 define dso_local <4 x bfloat> @test_vdup_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vdup_n_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -144,7 +141,6 @@ entry:
 define dso_local <8 x bfloat> @test_vdupq_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vdupq_n_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -159,7 +155,6 @@ entry:
 define dso_local <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_vdup_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -170,7 +165,6 @@ entry:
 define dso_local <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_vdupq_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -202,7 +196,6 @@ define dso_local <4 x bfloat> @test_vext_aligned_bf16(<8 x bfloat> %a) {
 ; CHECK-LABEL: test_vext_aligned_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vext = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -213,7 +206,6 @@ define dso_local <4 x bfloat> @test_vext_unaligned_bf16(<8 x bfloat> %a) {
 ; CHECK-LABEL: test_vext_unaligned_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #6
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vext = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
@@ -224,14 +216,11 @@ define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) {
 ; CHECK-LABEL: shuffle3step0_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI16_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    mov v3.16b, v2.16b
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI16_0]
 ; CHECK-NEXT:    adrp x8, .LCPI16_1
-; CHECK-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v4.16b
+; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI16_1]
-; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECK-NEXT:    ret
 entry:
   %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
@@ -242,14 +231,11 @@ define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) {
 ; CHECK-LABEL: shuffle3step1_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI17_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    mov v3.16b, v2.16b
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI17_0]
 ; CHECK-NEXT:    adrp x8, .LCPI17_1
-; CHECK-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v4.16b
+; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI17_1]
-; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECK-NEXT:    ret
 entry:
   %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
@@ -260,14 +246,11 @@ define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) {
 ; CHECK-LABEL: shuffle3step2_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    mov v3.16b, v2.16b
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    adrp x8, .LCPI18_1
-; CHECK-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v4.16b
+; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI18_1]
-; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECK-NEXT:    ret
 entry:
   %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
@@ -344,7 +327,6 @@ entry:
 define <8 x bfloat> @test_shufflevector8xbfloat(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_shufflevector8xbfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
index 9b6e19eba3f4e6..762def573cd175 100644
--- a/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
@@ -22,7 +22,6 @@ define <4 x bfloat> @add_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fadd v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -62,7 +61,6 @@ define <4 x bfloat> @sub_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fsub v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -91,7 +89,6 @@ define <4 x bfloat> @mul_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fmul v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -120,7 +117,6 @@ define <4 x bfloat> @div_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fdiv v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -168,7 +164,6 @@ define <4 x bfloat> @s_to_h(<4 x float> %a) {
 ; CHECK-BF16-LABEL: s_to_h:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = fptrunc <4 x float> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -196,7 +191,6 @@ define <4 x bfloat> @d_to_h(<4 x double> %a) {
 ; CHECK-BF16-NEXT:    fcvtxn v0.2s, v0.2d
 ; CHECK-BF16-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = fptrunc <4 x double> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -262,7 +256,6 @@ define <4 x bfloat> @sitofp_i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -286,7 +279,6 @@ define <4 x bfloat> @sitofp_i16(<4 x i16> %a) #0 {
 ; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i16> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -309,7 +301,6 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i32> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -342,7 +333,6 @@ define <4 x bfloat> @sitofp_i64(<4 x i64> %a) #0 {
 ; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i64> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -368,7 +358,6 @@ define <4 x bfloat> @uitofp_i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -393,7 +382,6 @@ define <4 x bfloat> @uitofp_i16(<4 x i16> %a) #0 {
 ; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i16> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -416,7 +404,6 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i32> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -449,7 +436,6 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
 ; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i64> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -458,7 +444,6 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
 define void @test_insert_at_zero(bfloat %a, ptr %b) #0 {
 ; CHECK-LABEL: test_insert_at_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
   %1 = insertelement <4 x bfloat> undef, bfloat %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
index c03e2e5321321a..8eb8423bf75171 100644
--- a/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
@@ -336,7 +336,6 @@ define <4 x bfloat> @sitofp_v4i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -564,7 +563,6 @@ define <4 x bfloat> @uitofp_v4i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
-; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -777,7 +775,6 @@ define <8 x bfloat> @uitofp_i64(<8 x i64> %a) #0 {
 define void @test_insert_at_zero(bfloat %a, ptr %b) #0 {
 ; CHECK-LABEL: test_insert_at_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %1 = insertelement <8 x bfloat> undef, bfloat %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
index 222d7435ff7420..b0e84fb6742c9c 100644
--- a/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
@@ -16,7 +16,6 @@ entry:
 define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind {
 ; CHECK-LABEL: test_vdup_n_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -29,7 +28,6 @@ entry:
 define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind {
 ; CHECK-LABEL: test_vdupq_n_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -42,7 +40,6 @@ entry:
 define <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vdup_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -54,7 +51,6 @@ entry:
 define <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vdupq_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -88,8 +84,6 @@ entry:
 define <8 x bfloat> @test_vcombine_bf16(<4 x bfloat> %low, <4 x bfloat> %high) nounwind {
 ; CHECK-LABEL: test_vcombine_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -102,7 +96,6 @@ define <4 x bfloat> @test_vget_high_bf16(<8 x bfloat> %a) nounwind {
 ; CHECK-LABEL: test_vget_high_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -113,7 +106,6 @@ entry:
 define <4 x bfloat> @test_vget_low_bf16(<8 x bfloat> %a) nounwind {
 ; CHECK-LABEL: test_vget_low_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -124,7 +116,6 @@ entry:
 define bfloat @test_vget_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vget_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -147,8 +138,6 @@ entry:
 define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vset_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    mov v1.h[1], v0.h[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -161,7 +150,6 @@ entry:
 define <8 x bfloat> @test_vsetq_lane_bf16(bfloat %a, <8 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vsetq_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    mov v1.h[7], v0.h[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -174,7 +162,6 @@ entry:
 define bfloat @test_vduph_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vduph_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -197,10 +184,7 @@ entry:
 define <4 x bfloat> @test_vcopy_lane_bf16_v1(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_lane_bf16_v1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[3]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
@@ -211,10 +195,7 @@ entry:
 define <4 x bfloat> @test_vcopy_lane_bf16_v2(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_lane_bf16_v2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
@@ -225,7 +206,6 @@ entry:
 define <8 x bfloat> @test_vcopyq_lane_bf16_v1(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopyq_lane_bf16_v1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[0], v1.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -238,7 +218,6 @@ entry:
 define <8 x bfloat> @test_vcopyq_lane_bf16_v2(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopyq_lane_bf16_v2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[6], v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -251,9 +230,7 @@ entry:
 define <4 x bfloat> @test_vcopy_laneq_bf16_v1(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_laneq_bf16_v1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[7]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vgetq_lane = extractelement <8 x bfloat> %b, i32 7
@@ -265,9 +242,7 @@ entry:
 define <4 x bfloat> @test_vcopy_laneq_bf16_v2(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_laneq_bf16_v2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[4]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vgetq_lane = extractelement <8 x bfloat> %b, i32 4

diff  --git a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
index 864ddc2967c185..9dd505359fcd9c 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
@@ -7,7 +7,6 @@ define <2 x i16> @bitcast_v2i16_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: bitcast_v2i16_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %y = bitcast <2 x half> %x to <2 x i16>
   ret <2 x i16> %y

diff  --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index 39f2572d9fd354..3f5edaefdd1fe8 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -89,7 +89,6 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add i32 %a, %b
   %d = bitcast i32 %c to <4 x i8>
@@ -127,7 +126,6 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
 ; CHECK-SD-NEXT:    add w8, w0, w1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: bitcast_i32_v2i16:
@@ -138,7 +136,6 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add i32 %a, %b
   %d = bitcast i32 %c to <2 x i16>
@@ -407,7 +404,6 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-SD-NEXT:    strh w8, [sp, #14]
 ; CHECK-SD-NEXT:    ldr s0, [sp, #12]
 ; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #16
 ; CHECK-SD-NEXT:    ret
 ;
@@ -424,7 +420,6 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add <2 x i16> %a, %b
   %d = bitcast <2 x i16> %c to <4 x i8>
@@ -443,7 +438,6 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x8]
 ; CHECK-SD-NEXT:    orr x8, x8, #0x2
 ; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #16
 ; CHECK-SD-NEXT:    ret
 ;
@@ -456,7 +450,6 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add <4 x i8> %a, %b
   %d = bitcast <4 x i8> %c to <2 x i16>
@@ -506,12 +499,12 @@ define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){
 ;
 ; CHECK-GI-LABEL: bitcast_v8i32_v4i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    add v2.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT:    add v3.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT:    mov x8, v2.d[1]
-; CHECK-GI-NEXT:    mov x9, v3.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v2.d[0]
-; CHECK-GI-NEXT:    mov v1.d[0], v3.d[0]
+; CHECK-GI-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    add v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    ret
@@ -567,12 +560,12 @@ define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){
 ;
 ; CHECK-GI-LABEL: bitcast_v16i16_v4i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    add v2.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT:    add v3.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT:    mov x8, v2.d[1]
-; CHECK-GI-NEXT:    mov x9, v3.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v2.d[0]
-; CHECK-GI-NEXT:    mov v1.d[0], v3.d[0]
+; CHECK-GI-NEXT:    add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT:    add v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    ret
@@ -609,18 +602,18 @@ define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){
 ;
 ; CHECK-GI-LABEL: bitcast_v16i32_v8i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    add v4.4s, v0.4s, v4.4s
-; CHECK-GI-NEXT:    add v5.4s, v1.4s, v5.4s
-; CHECK-GI-NEXT:    add v6.4s, v2.4s, v6.4s
-; CHECK-GI-NEXT:    add v7.4s, v3.4s, v7.4s
-; CHECK-GI-NEXT:    mov x8, v4.d[1]
-; CHECK-GI-NEXT:    mov x9, v5.d[1]
-; CHECK-GI-NEXT:    mov x10, v6.d[1]
-; CHECK-GI-NEXT:    mov x11, v7.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v4.d[0]
-; CHECK-GI-NEXT:    mov v1.d[0], v5.d[0]
-; CHECK-GI-NEXT:    mov v2.d[0], v6.d[0]
-; CHECK-GI-NEXT:    mov v3.d[0], v7.d[0]
+; CHECK-GI-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT:    add v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT:    add v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT:    add v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    mov x10, v2.d[1]
+; CHECK-GI-NEXT:    mov x11, v3.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
+; CHECK-GI-NEXT:    mov v3.d[0], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    mov v2.d[1], x10

diff  --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index eefb862c5313cf..7adfafc28aa9cc 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -546,7 +546,6 @@ define i32 @test9(i64 %b, i32 %e) {
 ; CHECK-NEXT:    lsr x0, x0, #12
 ; CHECK-NEXT:    lsr w8, w1, #23
 ; CHECK-NEXT:    bfi w0, w8, #23, #9
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %c = lshr i64 %b, 12
   %d = trunc i64 %c to i32

diff  --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index e86f55d63f754b..08bbad6ab69b5a 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -25,7 +25,6 @@ declare i16 @llvm.bswap.i16(i16)
 define i64 @bswap_i16_to_i64_anyext(i16 %a) {
 ; CHECK-SD-LABEL: bswap_i16_to_i64_anyext:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    rev16 x8, x0
 ; CHECK-SD-NEXT:    lsl x0, x8, #48
 ; CHECK-SD-NEXT:    ret
@@ -179,7 +178,6 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    rev16 v0.8b, v0.8b
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
@@ -210,7 +208,6 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    rev w8, w8
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call <1 x i32> @llvm.bswap.v1i32(<1 x i32> %a)

diff  --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll
index ac37fbc349d7d7..0b71ff2ecfcdf3 100644
--- a/llvm/test/CodeGen/AArch64/build-one-lane.ll
+++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll
@@ -9,7 +9,6 @@ define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.b[7], w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 7
   ret <8 x i8> %v
@@ -30,7 +29,6 @@ define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.h[3], w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 3
   ret <4 x i16> %v
@@ -51,7 +49,6 @@ define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.s[1], w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x i32> <i32 0, i32 undef>, i32 %s, i32 1
   ret <2 x i32> %v
@@ -81,9 +78,7 @@ define <2 x float> @v2f32z(float %t, float %s) nounwind {
 ; CHECK-LABEL: v2f32z:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d0, #0000000000000000
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x float> <float 0.0, float undef>, float %s, i32 1
   ret <2 x float> %v
@@ -93,7 +88,6 @@ define <4 x float> @v4f32z(float %t, float %s) nounwind {
 ; CHECK-LABEL: v4f32z:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    ret
   %v = insertelement <4 x float> <float 0.0, float 0.0, float 0.0, float undef>, float %s, i32 3
@@ -104,7 +98,6 @@ define <2 x double> @v2f64z(double %t, double %s) nounwind {
 ; CHECK-LABEL: v2f64z:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x double> <double 0.0, double undef>, double %s, i32 1
@@ -119,7 +112,6 @@ define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v0.b[7], w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 7
   ret <8 x i8> %v
@@ -140,7 +132,6 @@ define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v0.h[3], w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 3
   ret <4 x i16> %v
@@ -161,7 +152,6 @@ define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v0.s[1], w1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x i32> <i32 -1, i32 undef>, i32 %s, i32 1
   ret <2 x i32> %v
@@ -277,7 +267,6 @@ define void @v2f32st(ptr %p, float %s) nounwind {
 ; CHECK-LABEL: v2f32st:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.2s, #64, lsl #24
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-NEXT:    str d1, [x0]
 ; CHECK-NEXT:    ret
@@ -290,7 +279,6 @@ define void @v4f32st(ptr %p, float %s) nounwind {
 ; CHECK-LABEL: v4f32st:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #192, lsl #24
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -303,7 +291,6 @@ define void @v2f64st(ptr %p, double %s) nounwind {
 ; CHECK-LABEL: v2f64st:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov v1.2d, #2.00000000
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -320,7 +307,6 @@ define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1f
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    mov w10, #30 // =0x1e

diff  --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
index 36b1b2cdcb4320..6bfb9adc8a60d1 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
@@ -631,7 +631,6 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) {
 define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
 ; CHECK-LABEL: larger_bv_than_source:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.h[2]
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
index dbbfbea9176f6e..7f793b7ca550a6 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
@@ -101,7 +101,6 @@ define <8 x i8> @test6(ptr nocapture noundef readonly %a, ptr nocapture noundef
 ; CHECK-NEXT:    ld1r { v1.8b }, [x1]
 ; CHECK-NEXT:    ld1r { v0.8b }, [x0]
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -120,7 +119,6 @@ define <8 x i8> @test7(ptr nocapture noundef readonly %a, ptr nocapture noundef
 ; CHECK-NEXT:    ld1r { v1.8b }, [x0]
 ; CHECK-NEXT:    ld1r { v0.8b }, [x1]
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -193,7 +191,6 @@ define <8 x i8> @test11(ptr nocapture noundef readonly %a, ptr nocapture noundef
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1

diff  --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
index 8fbed8bfdb3fd8..27666f06e81426 100644
--- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
+++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
@@ -143,7 +143,6 @@ B:
 define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: f_i32_sign_extend_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    csel x8, x1, x2, ge
@@ -167,7 +166,6 @@ B:
 define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: g_i32_sign_extend_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    csel x8, x1, x2, lt

diff  --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index 1cc194e77b94b1..e87d43161a895e 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -31,7 +31,6 @@ entry:
 define i1 @test_EQ_IlsEbT(i64 %a, i16 %b) {
 ; CHECK-LABEL: test_EQ_IlsEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -45,7 +44,6 @@ entry:
 define i1 @test_EQ_IlcEbT(i64 %a, i8 %b) {
 ; CHECK-LABEL: test_EQ_IlcEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -110,7 +108,6 @@ entry:
 define i1 @test_EQ_IslEbT(i16 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IslEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -167,7 +164,6 @@ entry:
 define i1 @test_EQ_IclEbT(i8 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IclEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -249,7 +245,6 @@ entry:
 define i1 @test_NE_IlsEbT(i64 %a, i16 %b) {
 ; CHECK-LABEL: test_NE_IlsEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -263,7 +258,6 @@ entry:
 define i1 @test_NE_IlcEbT(i64 %a, i8 %b) {
 ; CHECK-LABEL: test_NE_IlcEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -328,7 +322,6 @@ entry:
 define i1 @test_NE_IslEbT(i16 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IslEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -385,7 +378,6 @@ entry:
 define i1 @test_NE_IclEbT(i8 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IclEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 186d191444feb6..b7817ebe59b9b5 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -53,7 +53,6 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
 ; CHECK-LABEL: test_return_bool:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:  LBB1_1: ; %cmpxchg.start
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldaxrb w9, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/combine-andintoload.ll b/llvm/test/CodeGen/AArch64/combine-andintoload.ll
index f0b8fef848998f..1d016a406d6b4f 100644
--- a/llvm/test/CodeGen/AArch64/combine-andintoload.ll
+++ b/llvm/test/CodeGen/AArch64/combine-andintoload.ll
@@ -254,14 +254,12 @@ define i64 @load8_and16_sext(ptr %p, i8 %y) {
 ; CHECK-LABEL: load8_and16_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x0, x1, x8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load8_and16_sext:
 ; CHECKBE:       // %bb.0:
 ; CHECKBE-NEXT:    ldrb w8, [x0]
-; CHECKBE-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECKBE-NEXT:    and x0, x1, x8
 ; CHECKBE-NEXT:    ret
   %x = load i8, ptr %p, align 4

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
index a5c64c0982d0f8..b62e2f1fa945f2 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
@@ -14,29 +14,15 @@ target triple = "aarch64"
 
 ; Expected to not transform
 define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
-; CHECK-SD-LABEL: complex_add_v2f16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    mov h2, v0.h[1]
-; CHECK-SD-NEXT:    mov h3, v1.h[1]
-; CHECK-SD-NEXT:    fsub h1, h1, h2
-; CHECK-SD-NEXT:    fadd h0, h3, h0
-; CHECK-SD-NEXT:    mov v1.h[1], v0.h[0]
-; CHECK-SD-NEXT:    fmov d0, d1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: complex_add_v2f16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov h2, v0.h[1]
-; CHECK-GI-NEXT:    mov h3, v1.h[1]
-; CHECK-GI-NEXT:    fsub h1, h1, h2
-; CHECK-GI-NEXT:    fadd h0, h3, h0
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: complex_add_v2f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov h2, v0.h[1]
+; CHECK-NEXT:    mov h3, v1.h[1]
+; CHECK-NEXT:    fsub h1, h1, h2
+; CHECK-NEXT:    fadd h0, h3, h0
+; CHECK-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
 entry:
   %a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
   %a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
index afcdb76067f433..8f84314f07918b 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
@@ -7,15 +7,12 @@ target triple = "aarch64"
 define <2 x half> @complex_mul_v2f16(<2 x half> %a, <2 x half> %b) {
 ; CHECK-LABEL: complex_mul_v2f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h2, v0.h[1]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul h3, h0, v1.h[1]
 ; CHECK-NEXT:    fmul h2, h2, v1.h[1]
 ; CHECK-NEXT:    fmla h3, h1, v0.h[1]
 ; CHECK-NEXT:    fnmsub h0, h1, h0, h2
 ; CHECK-NEXT:    mov v0.h[1], v3.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %a.real   = shufflevector <2 x half> %a, <2 x half> poison, <1 x i32> <i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
index 039025dafa0d6e..eae724870fb9dd 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
@@ -111,8 +111,8 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> %
 ; CHECK-NEXT:    fmul v17.2s, v6.2s, v5.2s
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    fmul v5.2s, v4.2s, v5.2s
-; CHECK-NEXT:    fmla v17.2s, v1.2s, v4.2s
 ; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #0
+; CHECK-NEXT:    fmla v17.2s, v1.2s, v4.2s
 ; CHECK-NEXT:    str d1, [x0]
 ; CHECK-NEXT:    fneg v16.2s, v5.2s
 ; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #90
@@ -162,19 +162,19 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p
 ; CHECK-NEXT:    ld2 { v0.2s, v1.2s }, [x0]
 ; CHECK-NEXT:    ld2 { v2.2s, v3.2s }, [x1]
 ; CHECK-NEXT:    fmul v4.2s, v3.2s, v1.2s
-; CHECK-NEXT:    fmul v6.2s, v2.2s, v1.2s
+; CHECK-NEXT:    fmul v1.2s, v2.2s, v1.2s
 ; CHECK-NEXT:    fneg v4.2s, v4.2s
-; CHECK-NEXT:    fmla v6.2s, v0.2s, v3.2s
+; CHECK-NEXT:    fmla v1.2s, v0.2s, v3.2s
 ; CHECK-NEXT:    fmla v4.2s, v0.2s, v2.2s
 ; CHECK-NEXT:    str d4, [x4]
 ; CHECK-NEXT:    ldr q5, [x2]
-; CHECK-NEXT:    ext v7.16b, v5.16b, v5.16b, #8
-; CHECK-NEXT:    zip1 v0.2s, v5.2s, v7.2s
-; CHECK-NEXT:    zip2 v1.2s, v5.2s, v7.2s
-; CHECK-NEXT:    fmul v3.2s, v0.2s, v6.2s
-; CHECK-NEXT:    fmul v6.2s, v1.2s, v6.2s
-; CHECK-NEXT:    fmla v3.2s, v4.2s, v1.2s
-; CHECK-NEXT:    fneg v2.2s, v6.2s
+; CHECK-NEXT:    ext v2.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT:    zip1 v0.2s, v5.2s, v2.2s
+; CHECK-NEXT:    zip2 v2.2s, v5.2s, v2.2s
+; CHECK-NEXT:    fmul v3.2s, v0.2s, v1.2s
+; CHECK-NEXT:    fmul v1.2s, v2.2s, v1.2s
+; CHECK-NEXT:    fmla v3.2s, v4.2s, v2.2s
+; CHECK-NEXT:    fneg v2.2s, v1.2s
 ; CHECK-NEXT:    fmla v2.2s, v4.2s, v0.2s
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    st2 { v2.2s, v3.2s }, [x5]
@@ -241,20 +241,20 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, <
 ; CHECK-NEXT:    zip1 v3.2s, v3.2s, v17.2s
 ; CHECK-NEXT:    fmul v18.2s, v6.2s, v7.2s
 ; CHECK-NEXT:    fmul v5.2s, v19.2s, v16.2s
-; CHECK-NEXT:    fmul v16.2s, v2.2s, v16.2s
 ; CHECK-NEXT:    fmul v7.2s, v0.2s, v7.2s
+; CHECK-NEXT:    fmul v16.2s, v2.2s, v16.2s
 ; CHECK-NEXT:    fneg v4.2s, v18.2s
 ; CHECK-NEXT:    fmla v5.2s, v3.2s, v2.2s
-; CHECK-NEXT:    fneg v2.2s, v16.2s
 ; CHECK-NEXT:    fmla v7.2s, v1.2s, v6.2s
+; CHECK-NEXT:    fneg v2.2s, v16.2s
 ; CHECK-NEXT:    fmla v4.2s, v1.2s, v0.2s
-; CHECK-NEXT:    fmla v2.2s, v3.2s, v19.2s
 ; CHECK-NEXT:    fmul v0.2s, v7.2s, v5.2s
+; CHECK-NEXT:    fmla v2.2s, v3.2s, v19.2s
 ; CHECK-NEXT:    fmul v17.2s, v4.2s, v5.2s
 ; CHECK-NEXT:    str d4, [x0]
-; CHECK-NEXT:    fmla v17.2s, v2.2s, v7.2s
 ; CHECK-NEXT:    fneg v16.2s, v0.2s
 ; CHECK-NEXT:    zip1 v0.4s, v2.4s, v5.4s
+; CHECK-NEXT:    fmla v17.2s, v2.2s, v7.2s
 ; CHECK-NEXT:    fmla v16.2s, v2.2s, v4.2s
 ; CHECK-NEXT:    st2 { v16.2s, v17.2s }, [x1]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index dcc11609ca2316..5e0602789cb72b 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -49,8 +49,6 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)
@@ -149,8 +147,6 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -253,8 +249,6 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 8e26ef6b87ecca..2a51ded6a96761 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -42,8 +42,6 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -131,8 +129,6 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -225,8 +221,6 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
 ; CHECK-NEXT:    fadd z2.d, z2.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z1.d
 ; CHECK-NEXT:    faddv d1, p0, z2.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -332,14 +326,12 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
 ; CHECK-NEXT:    add z2.d, z5.d, z2.d
 ; CHECK-NEXT:    b.ne .LBB3_1
 ; CHECK-NEXT:  // %bb.2: // %middle.block
-; CHECK-NEXT:    uaddv d2, p0, z2.d
 ; CHECK-NEXT:    uzp2 z3.d, z1.d, z0.d
 ; CHECK-NEXT:    uzp1 z1.d, z1.d, z0.d
+; CHECK-NEXT:    uaddv d2, p0, z2.d
 ; CHECK-NEXT:    faddv d0, p0, z3.d
-; CHECK-NEXT:    fmov x8, d2
 ; CHECK-NEXT:    faddv d1, p0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
+; CHECK-NEXT:    fmov x8, d2
 ; CHECK-NEXT:    str w8, [x4]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
index b4425c0c01e177..0277a98cba7b9e 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
@@ -57,8 +57,6 @@ define <vscale x 4 x double> @complex_mul_non_const(<vscale x 4 x double> %a, <v
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov z6.d, #0 // =0x0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d5 killed $d5 def $z5
-; CHECK-NEXT:    // kill: def $d4 killed $d4 def $z4
 ; CHECK-NEXT:    mov z5.d, d5
 ; CHECK-NEXT:    mov z4.d, d4
 ; CHECK-NEXT:    mov z24.d, z6.d

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
index ad9240b0922bda..83178a11499625 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
@@ -54,8 +54,6 @@ define <4 x double> @complex_mul_non_const(<4 x double> %a, <4 x double> %b, [2
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v7.2d, #0000000000000000
 ; CHECK-NEXT:    movi v6.2d, #0000000000000000
-; CHECK-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-NEXT:    mov v4.d[1], v5.d[0]
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
 ; CHECK-NEXT:    fcmla v7.2d, v1.2d, v3.2d, #0

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
index 7686740aec3026..5c4782ec986c2c 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
@@ -201,84 +201,76 @@ entry:
 define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c) {
 ; CHECK-LABEL: abp90c12:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-NEXT:    mov v1.s[1], v3.s[0]
 ; CHECK-NEXT:    ldr s17, [sp, #40]
+; CHECK-NEXT:    ldr s3, [sp, #32]
 ; CHECK-NEXT:    add x10, sp, #56
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    add x9, sp, #48
-; CHECK-NEXT:    mov v1.s[1], v3.s[0]
-; CHECK-NEXT:    ldr s3, [sp, #32]
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
 ; CHECK-NEXT:    ld1 { v17.s }[1], [x10]
-; CHECK-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT:    ldr s16, [sp, #8]
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    add x10, sp, #24
 ; CHECK-NEXT:    ld1 { v3.s }[1], [x9]
 ; CHECK-NEXT:    add x9, sp, #72
-; CHECK-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-NEXT:    ldr s16, [sp, #8]
+; CHECK-NEXT:    add x10, sp, #24
 ; CHECK-NEXT:    ldr s2, [sp]
-; CHECK-NEXT:    ld1 { v16.s }[1], [x10]
-; CHECK-NEXT:    add x10, sp, #112
-; CHECK-NEXT:    ldr s20, [sp, #136]
 ; CHECK-NEXT:    mov v1.s[2], v5.s[0]
+; CHECK-NEXT:    ldr s5, [sp, #96]
+; CHECK-NEXT:    ldr s20, [sp, #136]
 ; CHECK-NEXT:    ld1 { v17.s }[2], [x9]
 ; CHECK-NEXT:    add x9, sp, #64
-; CHECK-NEXT:    ldr s5, [sp, #96]
+; CHECK-NEXT:    ld1 { v16.s }[1], [x10]
 ; CHECK-NEXT:    ld1 { v3.s }[2], [x9]
+; CHECK-NEXT:    add x10, sp, #112
 ; CHECK-NEXT:    mov v0.s[2], v4.s[0]
 ; CHECK-NEXT:    add x9, sp, #88
 ; CHECK-NEXT:    ldr s4, [sp, #104]
-; CHECK-NEXT:    ldr s19, [sp, #192]
 ; CHECK-NEXT:    ld1 { v5.s }[1], [x10]
+; CHECK-NEXT:    mov v1.s[3], v7.s[0]
 ; CHECK-NEXT:    add x10, sp, #80
 ; CHECK-NEXT:    ld1 { v17.s }[3], [x9]
-; CHECK-NEXT:    mov v1.s[3], v7.s[0]
 ; CHECK-NEXT:    add x9, sp, #120
 ; CHECK-NEXT:    ld1 { v3.s }[3], [x10]
-; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
 ; CHECK-NEXT:    ldr s7, [sp, #128]
+; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
 ; CHECK-NEXT:    add x10, sp, #144
 ; CHECK-NEXT:    mov v0.s[3], v6.s[0]
-; CHECK-NEXT:    add x9, sp, #16
 ; CHECK-NEXT:    ld1 { v7.s }[1], [x10]
-; CHECK-NEXT:    ld1 { v2.s }[1], [x9]
-; CHECK-NEXT:    add x9, sp, #160
+; CHECK-NEXT:    add x9, sp, #16
+; CHECK-NEXT:    ldr s19, [sp, #192]
 ; CHECK-NEXT:    fmul v6.4s, v17.4s, v1.4s
+; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
+; CHECK-NEXT:    ld1 { v2.s }[1], [x9]
 ; CHECK-NEXT:    fmul v18.4s, v4.4s, v16.4s
 ; CHECK-NEXT:    fmul v16.4s, v5.4s, v16.4s
-; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
-; CHECK-NEXT:    add x10, sp, #208
+; CHECK-NEXT:    add x9, sp, #160
 ; CHECK-NEXT:    ld1 { v7.s }[2], [x9]
 ; CHECK-NEXT:    add x9, sp, #152
-; CHECK-NEXT:    ld1 { v19.s }[1], [x10]
+; CHECK-NEXT:    add x10, sp, #208
 ; CHECK-NEXT:    ld1 { v20.s }[1], [x9]
 ; CHECK-NEXT:    add x9, sp, #176
-; CHECK-NEXT:    add x10, sp, #184
+; CHECK-NEXT:    ld1 { v19.s }[1], [x10]
 ; CHECK-NEXT:    fneg v6.4s, v6.4s
+; CHECK-NEXT:    fmla v1.4s, v0.4s, v17.4s
+; CHECK-NEXT:    add x10, sp, #184
 ; CHECK-NEXT:    fneg v18.4s, v18.4s
 ; CHECK-NEXT:    fmla v16.4s, v2.4s, v4.4s
-; CHECK-NEXT:    fmla v1.4s, v0.4s, v17.4s
 ; CHECK-NEXT:    ld1 { v7.s }[3], [x9]
 ; CHECK-NEXT:    add x9, sp, #168
-; CHECK-NEXT:    ld1 { v20.s }[2], [x9]
 ; CHECK-NEXT:    ldr s4, [sp, #200]
+; CHECK-NEXT:    ld1 { v20.s }[2], [x9]
 ; CHECK-NEXT:    add x9, sp, #216
 ; CHECK-NEXT:    fmla v6.4s, v0.4s, v3.4s
-; CHECK-NEXT:    fmla v18.4s, v2.4s, v5.4s
-; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
 ; CHECK-NEXT:    fsub v0.4s, v7.4s, v1.4s
+; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
+; CHECK-NEXT:    fmla v18.4s, v2.4s, v5.4s
 ; CHECK-NEXT:    fsub v1.4s, v19.4s, v16.4s
 ; CHECK-NEXT:    ld1 { v20.s }[3], [x10]
 ; CHECK-NEXT:    fadd v2.4s, v4.4s, v18.4s
 ; CHECK-NEXT:    fadd v3.4s, v20.4s, v6.4s
 ; CHECK-NEXT:    ext v4.16b, v0.16b, v1.16b, #12
 ; CHECK-NEXT:    ext v5.16b, v3.16b, v2.16b, #12
-; CHECK-NEXT:    trn2 v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    ext v4.16b, v0.16b, v4.16b, #12
+; CHECK-NEXT:    trn2 v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    ext v5.16b, v3.16b, v5.16b, #8
 ; CHECK-NEXT:    rev64 v4.4s, v4.4s
 ; CHECK-NEXT:    trn2 v2.4s, v4.4s, v5.4s

diff  --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 0daa6e7f16202a..8570b4d46d9733 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -10,15 +10,12 @@ define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
 ;
 ; CHECK-GI-LABEL: concat1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w9, v1.s[1]
 ; CHECK-GI-NEXT:    mov v0.h[1], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
 ; CHECK-GI-NEXT:    mov v0.h[3], w9
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x i8> %v4i8
@@ -38,7 +35,6 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v8i8 = shufflevector <4 x i8> %A, <4 x i8> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    ret <8 x i8> %v8i8
@@ -47,8 +43,6 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
 define <16 x i8> @concat3(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: concat3:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v16i8 = shufflevector <8 x i8> %A, <8 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -69,7 +63,6 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v4i16 = shufflevector <2 x i16> %A, <2 x i16> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x i16> %v4i16
@@ -78,8 +71,6 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
 define <8 x i16> @concat5(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: concat5:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v8i16 = shufflevector <4 x i16> %A, <4 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -101,8 +92,6 @@ define <16 x i16> @concat6(ptr %A, ptr %B) {
 define <4 x i32> @concat7(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: concat7:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v4i32 = shufflevector <2 x i32> %A, <2 x i32> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -133,7 +122,6 @@ define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x half> %v4half
@@ -142,8 +130,6 @@ define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
 define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) {
 ; CHECK-LABEL: concat10:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v8half= shufflevector <4 x half> %A, <4 x half> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -168,9 +154,9 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0]
 ; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    fmov s1, w8
-; CHECK-GI-NEXT:    mov v1.h[1], w9
-; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v0.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[0], v0.s[0]
 ; CHECK-GI-NEXT:    ret
     %a = load <2 x i16>, ptr %ptr
     %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -209,10 +195,6 @@ define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %p
 define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <4 x i8> %D) {
 ; CHECK-SD-LABEL: concat_v16s8_v4s8_reg:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
@@ -242,12 +224,8 @@ define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <
 define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %C, <2 x i16> %D) {
 ; CHECK-SD-LABEL: concat_v8s16_v2s16_reg:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
@@ -282,10 +260,9 @@ define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0]
 ; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    fmov s1, w8
-; CHECK-GI-NEXT:    mov v1.h[1], w9
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v0.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    ret
   %l1 = load <2 x i16>, ptr %p1
   %l2 = load <2 x i16>, ptr %p2
@@ -305,10 +282,9 @@ define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0]
 ; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    fmov s1, w8
-; CHECK-GI-NEXT:    mov v1.h[1], w9
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v0.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    ret
   %l1 = load <2 x i16>, ptr %p1
   %l2 = load <2 x i16>, ptr %p2

diff  --git a/llvm/test/CodeGen/AArch64/concatbinop.ll b/llvm/test/CodeGen/AArch64/concatbinop.ll
index 828182d18b38ce..ae0ded9db894aa 100644
--- a/llvm/test/CodeGen/AArch64/concatbinop.ll
+++ b/llvm/test/CodeGen/AArch64/concatbinop.ll
@@ -5,10 +5,6 @@
 define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_add:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
@@ -54,10 +50,6 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_sub:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
@@ -71,10 +63,6 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
@@ -88,10 +76,6 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_xor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
@@ -105,10 +89,6 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_fadd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
@@ -122,10 +102,6 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
 define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_fmul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    fmul v0.8h, v0.8h, v1.8h
@@ -139,10 +115,6 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
 define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_min:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    fminnm v0.8h, v0.8h, v1.8h

diff  --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
index 40684b0f3a256b..85abc8c8566b2e 100644
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@@ -76,7 +76,6 @@ entry:
 define bfloat @t7(bfloat %x)  {
 ; CHECK-LABEL: t7:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -90,7 +89,6 @@ define bfloat @t7(bfloat %x)  {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi bfloat %x to i32
@@ -101,7 +99,6 @@ entry:
 define bfloat @t8(bfloat %x)  {
 ; CHECK-LABEL: t8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -115,7 +112,6 @@ define bfloat @t8(bfloat %x)  {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui bfloat %x to i32
@@ -198,7 +194,6 @@ entry:
 define bfloat @t7_strict(bfloat %x) #0 {
 ; CHECK-LABEL: t7_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -212,7 +207,6 @@ define bfloat @t7_strict(bfloat %x) #0 {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
@@ -223,7 +217,6 @@ entry:
 define bfloat @t8_strict(bfloat %x) #0 {
 ; CHECK-LABEL: t8_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -237,7 +230,6 @@ define bfloat @t8_strict(bfloat %x) #0 {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0

diff  --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
index 11f19b8c56965b..c32113f7090812 100644
--- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
+++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
@@ -6,12 +6,11 @@
 define void @signbits_vXi1(<4 x i16> %a1) {
 ; CHECK-LABEL: signbits_vXi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    mov w1, wzr
-; CHECK-NEXT:    mov w2, wzr
-; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    mov w2, wzr
 ; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    cmle v0.4h, v0.4h, #0
 ; CHECK-NEXT:    umov w0, v0.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index a2ebdd28b16b8f..75009e41a511d6 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -38,7 +38,6 @@ entry:
 define <2 x i8> @duplane0_v2i8(<2 x i8> %b) {
 ; CHECK-LABEL: duplane0_v2i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -133,7 +132,6 @@ define <4 x i8> @dup_v4i8(i8 %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    dup v0.8b, w0
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %b = insertelement <4 x i8> poison, i8 %a, i64 0
@@ -144,7 +142,6 @@ entry:
 define <4 x i8> @duplane0_v4i8(<4 x i8> %b) {
 ; CHECK-SD-LABEL: duplane0_v4i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -153,7 +150,6 @@ define <4 x i8> @duplane0_v4i8(<4 x i8> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -171,7 +167,6 @@ define <4 x i8> @loaddup_v4i8(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.8b }, [x0]
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load i8, ptr %p
@@ -194,7 +189,6 @@ entry:
 define <8 x i8> @duplane0_v8i8(<8 x i8> %b) {
 ; CHECK-LABEL: duplane0_v8i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -299,7 +293,6 @@ define <2 x i16> @dup_v2i16(i16 %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    dup v0.4h, w0
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %b = insertelement <2 x i16> poison, i16 %a, i64 0
@@ -310,7 +303,6 @@ entry:
 define <2 x i16> @duplane0_v2i16(<2 x i16> %b) {
 ; CHECK-SD-LABEL: duplane0_v2i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -319,7 +311,6 @@ define <2 x i16> @duplane0_v2i16(<2 x i16> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
@@ -337,7 +328,6 @@ define <2 x i16> @loaddup_v2i16(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.4h }, [x0]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load i16, ptr %p
@@ -360,7 +350,6 @@ entry:
 define <3 x i16> @duplane0_v3i16(<3 x i16> %b) {
 ; CHECK-LABEL: duplane0_v3i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -394,7 +383,6 @@ entry:
 define <4 x i16> @duplane0_v4i16(<4 x i16> %b) {
 ; CHECK-LABEL: duplane0_v4i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -503,7 +491,6 @@ entry:
 define <2 x i32> @duplane0_v2i32(<2 x i32> %b) {
 ; CHECK-LABEL: duplane0_v2i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -694,12 +681,9 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
 ; CHECK-GI-LABEL: duplane0_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov d2, d0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
@@ -718,9 +702,7 @@ define <3 x i64> @loaddup_v3i64(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
 ; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load i64, ptr %p
@@ -903,7 +885,6 @@ entry:
 define <2 x half> @dup_v2half(half %a) {
 ; CHECK-LABEL: dup_v2half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -915,7 +896,6 @@ entry:
 define <2 x half> @duplane0_v2half(<2 x half> %b) {
 ; CHECK-LABEL: duplane0_v2half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -938,7 +918,6 @@ entry:
 define <3 x half> @dup_v3half(half %a) {
 ; CHECK-LABEL: dup_v3half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -950,7 +929,6 @@ entry:
 define <3 x half> @duplane0_v3half(<3 x half> %b) {
 ; CHECK-LABEL: duplane0_v3half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -973,7 +951,6 @@ entry:
 define <4 x half> @dup_v4half(half %a) {
 ; CHECK-LABEL: dup_v4half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -985,7 +962,6 @@ entry:
 define <4 x half> @duplane0_v4half(<4 x half> %b) {
 ; CHECK-LABEL: duplane0_v4half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1008,7 +984,6 @@ entry:
 define <8 x half> @dup_v8half(half %a) {
 ; CHECK-LABEL: dup_v8half:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1042,14 +1017,12 @@ entry:
 define <16 x half> @dup_v16half(half %a) {
 ; CHECK-SD-LABEL: dup_v16half:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v16half:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
 ; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -1093,7 +1066,6 @@ entry:
 define <2 x bfloat> @dup_v2bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v2bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1105,7 +1077,6 @@ entry:
 define <2 x bfloat> @duplane0_v2bfloat(<2 x bfloat> %b) {
 ; CHECK-LABEL: duplane0_v2bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1128,7 +1099,6 @@ entry:
 define <3 x bfloat> @dup_v3bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v3bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1140,7 +1110,6 @@ entry:
 define <3 x bfloat> @duplane0_v3bfloat(<3 x bfloat> %b) {
 ; CHECK-LABEL: duplane0_v3bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1163,7 +1132,6 @@ entry:
 define <4 x bfloat> @dup_v4bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v4bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1175,7 +1143,6 @@ entry:
 define <4 x bfloat> @duplane0_v4bfloat(<4 x bfloat> %b) {
 ; CHECK-LABEL: duplane0_v4bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1198,7 +1165,6 @@ entry:
 define <8 x bfloat> @dup_v8bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v8bfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1232,14 +1198,12 @@ entry:
 define <16 x bfloat> @dup_v16bfloat(bfloat %a) {
 ; CHECK-SD-LABEL: dup_v16bfloat:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v16bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
 ; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -1283,7 +1247,6 @@ entry:
 define <2 x float> @dup_v2float(float %a) {
 ; CHECK-LABEL: dup_v2float:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1295,7 +1258,6 @@ entry:
 define <2 x float> @duplane0_v2float(<2 x float> %b) {
 ; CHECK-LABEL: duplane0_v2float:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1318,7 +1280,6 @@ entry:
 define <3 x float> @dup_v3float(float %a) {
 ; CHECK-LABEL: dup_v3float:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1352,7 +1313,6 @@ entry:
 define <4 x float> @dup_v4float(float %a) {
 ; CHECK-LABEL: dup_v4float:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1386,14 +1346,12 @@ entry:
 define <8 x float> @dup_v8float(float %a) {
 ; CHECK-SD-LABEL: dup_v8float:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v8float:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    dup v2.4s, v0.s[0]
 ; CHECK-GI-NEXT:    dup v1.4s, v0.s[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -1437,7 +1395,6 @@ entry:
 define <2 x double> @dup_v2double(double %a) {
 ; CHECK-LABEL: dup_v2double:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1477,10 +1434,8 @@ define <3 x double> @dup_v3double(double %a) {
 ;
 ; CHECK-GI-LABEL: dup_v3double:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    dup v3.2d, v0.d[0]
 ; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v3.d[1]
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    ret
@@ -1500,12 +1455,9 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
 ; CHECK-GI-LABEL: duplane0_v3double:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov d2, d0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
@@ -1524,9 +1476,7 @@ define <3 x double> @loaddup_v3double(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
 ; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load double, ptr %p
@@ -1538,14 +1488,12 @@ entry:
 define <4 x double> @dup_v4double(double %a) {
 ; CHECK-SD-LABEL: dup_v4double:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v4double:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
 ; CHECK-GI-NEXT:    dup v1.2d, v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b

diff  --git a/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll b/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
index 420cf93a023d70..6aa375dc5e4d40 100644
--- a/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
+++ b/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
@@ -100,7 +100,6 @@ define <4 x i64> @sel.v4i32.umull(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) {
 define <4 x i32> @sel.v4i32.sqdmull(<8 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: sel.v4i32.sqdmull:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v2.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    sqdmlal2 v2.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    mov v0.16b, v2.16b

diff  --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index 177f2cafcf833a..0765377417d5bf 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -9,7 +9,6 @@
 define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -20,7 +19,6 @@ define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -31,7 +29,6 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-SD-LABEL: i8_off8:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i8_off8:
@@ -47,7 +44,6 @@ define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off15:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
@@ -58,13 +54,11 @@ define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-SD-LABEL: i8_off22:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v1.16b, v1.16b, #6
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i8_off22:
 ; CHECK-GISEL:       // %bb.0: // %entry
 ; CHECK-GISEL-NEXT:    ext v0.16b, v1.16b, v0.16b, #6
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
@@ -75,7 +69,6 @@ entry:
 define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -86,7 +79,6 @@ define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -97,7 +89,6 @@ define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off7:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
@@ -108,7 +99,6 @@ define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -119,7 +109,6 @@ entry:
 define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 0, i32 1>
@@ -130,7 +119,6 @@ define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
@@ -141,7 +129,6 @@ define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off3:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 3, i32 4>
@@ -152,7 +139,6 @@ define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off4:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 4, i32 5>
@@ -163,7 +149,6 @@ entry:
 define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 0>
@@ -174,7 +159,6 @@ define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-SD-LABEL: i64_off1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i64_off1:
@@ -190,7 +174,6 @@ define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 2>
@@ -201,7 +184,6 @@ entry:
 define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -212,7 +194,6 @@ define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -223,7 +204,6 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
 ; CHECK-SD-LABEL: i8_zero_off8:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i8_zero_off8:
@@ -240,7 +220,6 @@ define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
@@ -257,7 +236,6 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
 ; CHECK-GISEL:       // %bb.0: // %entry
 ; CHECK-GISEL-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v0.16b, #6
-; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
@@ -268,7 +246,6 @@ entry:
 define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -279,7 +256,6 @@ define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -291,7 +267,6 @@ define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
@@ -312,7 +287,6 @@ entry:
 define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
@@ -323,7 +297,6 @@ define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
@@ -335,7 +308,6 @@ define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 3, i32 4>
@@ -356,7 +328,6 @@ entry:
 define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 0>
@@ -367,7 +338,6 @@ define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) {
 ; CHECK-SD-LABEL: i64_zero_off1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i64_zero_off1:

diff  --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll
index 72f4d58a425e78..8f3a42cfd1e12d 100644
--- a/llvm/test/CodeGen/AArch64/extbinopload.ll
+++ b/llvm/test/CodeGen/AArch64/extbinopload.ll
@@ -6,7 +6,6 @@ define <4 x i16> @normal_load_v4i8(ptr %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp s0, s1, [x0]
 ; CHECK-NEXT:    uaddl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l1 = load <4 x i8>, ptr %p
   %q = getelementptr i8, ptr %p, i32 4
@@ -39,7 +38,6 @@ define <4 x i16> @load_v4i8(ptr %p) {
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #3
 ; CHECK-NEXT:    uaddw v0.8h, v0.8h, v1.8b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l1 = load <4 x i8>, ptr %p
   %q = getelementptr i8, ptr %p, i32 4

diff  --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index b87157a183835d..3f18e756b5c785 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -160,8 +160,6 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; CHECK-LABEL: bextr64_a1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl x8, x8, x2
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -199,8 +197,6 @@ define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
 ; CHECK-NEXT:    mov w9, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x9, x9, x2
 ; CHECK-NEXT:    lsr x8, x8, x1
 ; CHECK-NEXT:    sub x9, x9, #1
@@ -398,8 +394,6 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; CHECK-LABEL: bextr64_b1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl x8, x8, x2
 ; CHECK-NEXT:    bic x0, x9, x8
@@ -435,8 +429,6 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
 ; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x9, x9, x2
 ; CHECK-NEXT:    lsr x8, x8, x1
 ; CHECK-NEXT:    bic x0, x8, x9
@@ -473,7 +465,6 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl x8, x8, x2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -492,7 +483,6 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -512,7 +502,6 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -552,7 +541,6 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #32 // =0x20
 ; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w10, w0, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w8, w9, w8
@@ -592,7 +580,6 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    mov w10, #-1 // =0xffffffff
 ; CHECK-NEXT:    sub w9, w9, w2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    lsr w9, w10, w9
 ; CHECK-NEXT:    and w0, w9, w8
@@ -646,7 +633,6 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #64 // =0x40
 ; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x10, x0, x1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr x8, x9, x8
@@ -686,7 +672,6 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-NEXT:    mov w9, #64 // =0x40
 ; CHECK-NEXT:    mov x10, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    sub w9, w9, w2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x8, x8, x1
 ; CHECK-NEXT:    lsr x9, x10, x9
 ; CHECK-NEXT:    and x0, x9, x8
@@ -797,7 +782,6 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr32_d1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w0, w1
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    sub w9, w9, w2
@@ -834,7 +818,6 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-LABEL: bextr32_d3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    sub w9, w9, w2
 ; CHECK-NEXT:    lsr w8, w8, w1
@@ -871,7 +854,6 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_d1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x8, x0, x1
 ; CHECK-NEXT:    mov w9, #64 // =0x40
 ; CHECK-NEXT:    sub w9, w9, w2
@@ -908,7 +890,6 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-LABEL: bextr64_d3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov w9, #64 // =0x40
 ; CHECK-NEXT:    sub w9, w9, w2
 ; CHECK-NEXT:    lsr x8, x8, x1
@@ -935,7 +916,6 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; CHECK-NEXT:    neg x9, x2
 ; CHECK-NEXT:    lsl x8, x8, x9
 ; CHECK-NEXT:    lsr x0, x8, x9
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits

diff  --git a/llvm/test/CodeGen/AArch64/extract-insert.ll b/llvm/test/CodeGen/AArch64/extract-insert.ll
index 8c133d76ce3173..994d6ee1ca6038 100644
--- a/llvm/test/CodeGen/AArch64/extract-insert.ll
+++ b/llvm/test/CodeGen/AArch64/extract-insert.ll
@@ -6,12 +6,10 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i32_le:
 ; BE:       // %bb.0:
 ; BE-NEXT:    lsr x0, x0, #32
-; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i32_le:
 ; LE:       // %bb.0:
-; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <4 x i32>
@@ -22,13 +20,11 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
 define i32 @trunc_i64_to_i32_be(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i32_be:
 ; BE:       // %bb.0:
-; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i32_be:
 ; LE:       // %bb.0:
 ; LE-NEXT:    lsr x0, x0, #32
-; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <4 x i32>
@@ -40,12 +36,10 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i16_le:
 ; BE:       // %bb.0:
 ; BE-NEXT:    lsr x0, x0, #48
-; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i16_le:
 ; LE:       // %bb.0:
-; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <8 x i16>
@@ -56,13 +50,11 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
 define i16 @trunc_i64_to_i16_be(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i16_be:
 ; BE:       // %bb.0:
-; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i16_be:
 ; LE:       // %bb.0:
 ; LE-NEXT:    lsr x0, x0, #48
-; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <8 x i16>
@@ -105,13 +97,11 @@ define i8 @trunc_i32_to_i8_be(i32 %x) {
 define i8 @trunc_i64_to_i8_be(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i8_be:
 ; BE:       // %bb.0:
-; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i8_be:
 ; LE:       // %bb.0:
 ; LE-NEXT:    lsr x0, x0, #56
-; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <3 x i64> undef, i64 %x, i32 0
   %bc = bitcast <3 x i64> %ins to <24 x i8>

diff  --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
index 4b8f3e86b5fefe..c1fcc994d29b58 100644
--- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
@@ -130,7 +130,6 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    and x0, x8, x0
@@ -162,7 +161,6 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -284,7 +282,6 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -314,7 +311,6 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x9, x8

diff  --git a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
index ecb76d9320a288..82979810a350bf 100644
--- a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
+++ b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
@@ -48,13 +48,11 @@ define i32 @extract_v4i32(<4 x i32> %x, i32 %y) {
 define i32 @extract_v2i32(<2 x i32> %x, i32 %y) {
 ; CHECK-ISEL-LABEL: extract_v2i32:
 ; CHECK-ISEL:       // %bb.0:
-; CHECK-ISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-ISEL-NEXT:    mov w0, v0.s[1]
 ; CHECK-ISEL-NEXT:    ret
 ;
 ; CHECK-GLOBAL-LABEL: extract_v2i32:
 ; CHECK-GLOBAL:       // %bb.0:
-; CHECK-GLOBAL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GLOBAL-NEXT:    mov s0, v0.s[1]
 ; CHECK-GLOBAL-NEXT:    fmov w0, s0
 ; CHECK-GLOBAL-NEXT:    ret
@@ -74,7 +72,6 @@ define i16 @extract_v8i16(<8 x i16> %x, i32 %y) {
 define i16 @extract_v4i16(<4 x i16> %x, i32 %y) {
 ; CHECK-LABEL: extract_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[1]
 ; CHECK-NEXT:    ret
   %ext = extractelement <4 x i16> %x, i32 1
@@ -93,7 +90,6 @@ define i8 @extract_v16i8(<16 x i8> %x, i32 %y) {
 define i8 @extract_v8i8(<8 x i8> %x, i32 %y) {
 ; CHECK-LABEL: extract_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[1]
 ; CHECK-NEXT:    ret
   %ext = extractelement <8 x i8> %x, i32 1
@@ -104,7 +100,6 @@ define i8 @extract_v8i8(<8 x i8> %x, i32 %y) {
 define i64 @sv2i32i64(<2 x i32> %x) {
 ; CHECK-LABEL: sv2i32i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.s[1]
 ; CHECK-NEXT:    ret
   %e = extractelement <2 x i32> %x, i64 1
@@ -125,7 +120,6 @@ define i64 @sv4i32i64(<4 x i32> %x) {
 define i64 @sv4i16i64(<4 x i16> %x) {
 ; CHECK-LABEL: sv4i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -146,7 +140,6 @@ define i64 @sv8i16i64(<8 x i16> %x) {
 define i64 @sv8i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: sv8i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -177,7 +170,6 @@ define i32 @sv8i16i32(<8 x i16> %x) {
 define i32 @sv4i16i32(<4 x i16> %x) {
 ; CHECK-LABEL: sv4i16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -198,7 +190,6 @@ define i32 @sv16i8i32(<16 x i8> %x) {
 define i32 @sv8i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: sv8i8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -219,7 +210,6 @@ define i16 @sv16i8i16(<16 x i8> %x) {
 define i16 @sv8i8i16(<8 x i8> %x) {
 ; CHECK-LABEL: sv8i8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -232,7 +222,6 @@ define i16 @sv8i8i16(<8 x i8> %x) {
 define i64 @zv2i32i64(<2 x i32> %x) {
 ; CHECK-LABEL: zv2i32i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w0, v0.s[1]
 ; CHECK-NEXT:    ret
   %e = extractelement <2 x i32> %x, i64 1
@@ -253,7 +242,6 @@ define i64 @zv4i32i64(<4 x i32> %x) {
 define i64 @zv4i16i64(<4 x i16> %x) {
 ; CHECK-LABEL: zv4i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -274,7 +262,6 @@ define i64 @zv8i16i64(<8 x i16> %x) {
 define i64 @zv8i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: zv8i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -305,7 +292,6 @@ define i32 @zv8i16i32(<8 x i16> %x) {
 define i32 @zv4i16i32(<4 x i16> %x) {
 ; CHECK-LABEL: zv4i16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -326,7 +312,6 @@ define i32 @zv16i8i32(<16 x i8> %x) {
 define i32 @zv8i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: zv8i8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -347,7 +332,6 @@ define i16 @zv16i8i16(<16 x i8> %x) {
 define i16 @zv8i8i16(<8 x i8> %x) {
 ; CHECK-LABEL: zv8i8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -386,7 +370,6 @@ define i32 @redundant_i16i32(<8 x i16> %x) {
 define i32 @both_i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: both_i8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[2]
 ; CHECK-NEXT:    smov w9, v0.b[2]
 ; CHECK-NEXT:    eor w0, w8, w9
@@ -401,7 +384,6 @@ define i32 @both_i8i32(<8 x i8> %x) {
 define i32 @redundant_i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: redundant_i8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[2]
 ; CHECK-NEXT:    eor w0, w8, w8, lsl #24
 ; CHECK-NEXT:    ret
@@ -469,7 +451,6 @@ define i64 @redundant_i16i64(<8 x i16> %x) {
 define i64 @both_i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: both_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[2]
 ; CHECK-NEXT:    smov x9, v0.b[2]
 ; CHECK-NEXT:    eor x0, x8, x9
@@ -484,7 +465,6 @@ define i64 @both_i8i64(<8 x i8> %x) {
 define i64 @redundant_i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: redundant_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x8, v0.b[2]
 ; CHECK-NEXT:    eor x0, x8, x8, lsl #56
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
index 75d55773b3681e..9e4a3b9272e6e3 100644
--- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
+++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
@@ -6,7 +6,6 @@ define <2 x i32> @and_extract_zext_idx0(<4 x i16> %vec) nounwind {
 ; CHECK-SD-LABEL: and_extract_zext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_zext_idx0:
@@ -25,7 +24,6 @@ define <4 x i16> @and_extract_sext_idx0(<8 x i8> %vec) nounwind {
 ; CHECK-SD-LABEL: and_extract_sext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_sext_idx0:
@@ -45,7 +43,6 @@ define <2 x i32> @and_extract_zext_idx2(<4 x i16> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_zext_idx2:
@@ -66,7 +63,6 @@ define <4 x i16> @and_extract_sext_idx4(<8 x i8> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_sext_idx4:
@@ -86,7 +82,6 @@ define <2 x i32> @sext_extract_zext_idx0(<4 x i16> %vec) nounwind {
 ; CHECK-SD-LABEL: sext_extract_zext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_zext_idx0:
@@ -121,7 +116,6 @@ define <4 x i16> @sext_extract_sext_idx0(<8 x i8> %vec) nounwind {
 ; CHECK-SD-LABEL: sext_extract_sext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_sext_idx0:
@@ -142,7 +136,6 @@ define <2 x i32> @sext_extract_zext_idx2(<4 x i16> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_zext_idx2:
@@ -164,7 +157,6 @@ define <4 x i16> @sext_extract_sext_idx4(<8 x i8> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_sext_idx4:
@@ -184,7 +176,6 @@ define <4 x i16> @sext_extract_sext_idx4(<8 x i8> %vec) nounwind {
 define <8 x i8> @sext_extract_idx(<16 x i8> %vec) nounwind {
 ; CHECK-LABEL: sext_extract_idx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %extract = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %vec, i64 0)
   ret <8 x i8> %extract

diff  --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index 12bd2db2297d77..eeae76f6b0f6e7 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -188,7 +188,6 @@ define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movi v1.4s, #127
 ; CHECK-NEXT:    add x8, sp, #8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s

diff  --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 5e5fdd6d317057..5adc225168198c 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -39,7 +39,6 @@ define i64 @extract_v2i64_opaque(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -77,7 +76,6 @@ define i64 @extract_v2i64_freeze(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -144,7 +142,6 @@ define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
@@ -179,7 +176,6 @@ define i64 @extract_v2i32_zext(<2 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -211,7 +207,6 @@ define i64 @extract_v2double_fptosi(<2 x double> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -243,7 +238,6 @@ define double @extract_v2double_fneg(<2 x double> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    fneg v0.2d, v0.2d
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr d0, [x8]
@@ -274,7 +268,6 @@ define i32 @extract_v4i32_add(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -311,7 +304,6 @@ define float @extract_v4i32_minimum(<4 x float> %a, <4 x float> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -342,7 +334,6 @@ define float @extract_v4i32_minimum_build_vector(<4 x float> %a, <4 x float> %b,
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -393,7 +384,6 @@ define float @extract_v4i32_copysign_build_vector(<4 x float> %a, <4 x float> %b
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -452,7 +442,6 @@ define i32 @extract_v4i32_icmp(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-SD-NEXT:    movi v2.4s, #1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -522,7 +511,6 @@ define i32 @extract_v4float_fcmp(<4 x float> %a, <4 x float> %b, i32 %c) {
 ; CHECK-SD-NEXT:    movi v1.4s, #1
 ; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v0.4s
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; CHECK-SD-NEXT:    str q0, [sp]
@@ -588,7 +576,6 @@ define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %c
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    ushll v1.4s, v2.4h, #0
 ; CHECK-SD-NEXT:    adrp x8, .LCPI22_0
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -658,7 +645,6 @@ define i32 @extract_v4i32_abs(<4 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    frintp v0.4s, v0.4s
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    frintm v0.4s, v0.4s
 ; CHECK-SD-NEXT:    fabs v0.4s, v0.4s
@@ -729,7 +715,6 @@ define i32 @extract_v4i32_abs_half_const(<4 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -772,9 +757,7 @@ define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    str q1, [sp]
@@ -790,7 +773,6 @@ entry:
 define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-LABEL: extract_v4i32_vector_insert_const:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov w0, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -805,7 +787,6 @@ define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-NEXT:    ldr w0, [x8]
@@ -831,7 +812,6 @@ entry:
 define i32 @extract_v4i32_load(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) {
 ; CHECK-SD-LABEL: extract_v4i32_load:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    ldr w0, [x1, x8, lsl #2]
 ; CHECK-SD-NEXT:    ret
@@ -865,7 +845,6 @@ define double @extract_v4i32_bitcast(<4 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr d0, [x8]
@@ -892,7 +871,6 @@ entry:
 define double @extract_v4i32_bitcast_const(<4 x i32> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4i32_bitcast_const:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vector = bitcast <4 x i32> %a to <2 x double>
@@ -907,7 +885,6 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    uzp1 v1.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp]
@@ -920,10 +897,8 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-GI-NEXT:    sub sp, sp, #16
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov x9, sp
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI35_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov w8, w0
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
@@ -955,7 +930,6 @@ define i32 @extract_v4i32_splat(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    movi v0.4s, #11
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -996,7 +970,6 @@ define i32 @extract_v4i32_vp_add(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %m
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr w0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index 5460a376931a55..e091b2a0a3ba6d 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -211,11 +211,8 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
 define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
 ; CHECK-CVT-SD-LABEL: test_select:
 ; CHECK-CVT-SD:       // %bb.0:
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-SD-NEXT:    cmp w0, #0
-; CHECK-CVT-SD-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_select:
@@ -226,26 +223,20 @@ define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
 ;
 ; CHECK-CVT-GI-LABEL: test_select:
 ; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
 ; CHECK-CVT-GI-NEXT:    fmov w9, s1
 ; CHECK-CVT-GI-NEXT:    tst w0, #0x1
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-CVT-GI-NEXT:    fmov s0, w8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_select:
 ; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fmov w9, s1
 ; CHECK-FP16-GI-NEXT:    tst w0, #0x1
 ; CHECK-FP16-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-FP16-GI-NEXT:    fmov s0, w8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-FP16-GI-NEXT:    ret
   %r = select i1 %c, half %a, half %b
   ret half %r
@@ -256,11 +247,8 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
 ; CHECK-CVT-SD:       // %bb.0:
 ; CHECK-CVT-SD-NEXT:    fcvt s3, h3
 ; CHECK-CVT-SD-NEXT:    fcvt s2, h2
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-SD-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-SD-NEXT:    fcmp s2, s3
 ; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_select_cc:
@@ -273,26 +261,20 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    fcvt s2, h2
 ; CHECK-CVT-GI-NEXT:    fcvt s3, h3
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
 ; CHECK-CVT-GI-NEXT:    fmov w9, s1
 ; CHECK-CVT-GI-NEXT:    fcmp s2, s3
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-CVT-GI-NEXT:    fmov s0, w8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_select_cc:
 ; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-FP16-GI-NEXT:    fcmp h2, h3
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fmov w9, s1
 ; CHECK-FP16-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-FP16-GI-NEXT:    fmov s0, w8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-FP16-GI-NEXT:    ret
   %cc = fcmp une half %c, %d
   %r = select i1 %cc, half %a, half %b
@@ -330,10 +312,7 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
 ; CHECK-CVT-SD-LABEL: test_select_cc_f16_f32:
 ; CHECK-CVT-SD:       // %bb.0:
 ; CHECK-CVT-SD-NEXT:    fcmp s2, s3
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-SD-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, ne
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_select_cc_f16_f32:
@@ -344,26 +323,20 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
 ;
 ; CHECK-CVT-GI-LABEL: test_select_cc_f16_f32:
 ; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-GI-NEXT:    fcmp s2, s3
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
 ; CHECK-CVT-GI-NEXT:    fmov w9, s1
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-CVT-GI-NEXT:    fmov s0, w8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_select_cc_f16_f32:
 ; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-FP16-GI-NEXT:    fcmp s2, s3
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fmov w9, s1
 ; CHECK-FP16-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-FP16-GI-NEXT:    fmov s0, w8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-FP16-GI-NEXT:    ret
   %cc = fcmp une float %c, %d
   %r = select i1 %cc, half %a, half %b
@@ -761,7 +734,6 @@ define i1 @test_fcmp_ord(half %a, half %b) #0 {
 define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-CVT-SD-LABEL: test_fccmp:
 ; CHECK-CVT-SD:       // %bb.0:
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-SD-NEXT:    fcvt s1, h0
 ; CHECK-CVT-SD-NEXT:    fmov s2, #5.00000000
 ; CHECK-CVT-SD-NEXT:    adrp x8, .LCPI29_0
@@ -787,7 +759,6 @@ define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    mov w8, #17664 // =0x4500
 ; CHECK-CVT-GI-NEXT:    mov w9, #18432 // =0x4800
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-GI-NEXT:    fcvt s2, h0
 ; CHECK-CVT-GI-NEXT:    fmov s1, w8
 ; CHECK-CVT-GI-NEXT:    fmov s3, w9
@@ -803,7 +774,6 @@ define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-FP16-GI-LABEL: test_fccmp:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    fmov h1, #5.00000000
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-FP16-GI-NEXT:    fmov h2, #8.00000000
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fcmp h0, h1
@@ -1089,7 +1059,6 @@ define double @test_fpext_double(half %a) #0 {
 define i16 @test_bitcast_halftoi16(half %a) #0 {
 ; CHECK-LABEL: test_bitcast_halftoi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %r = bitcast half %a to i16
@@ -1100,7 +1069,6 @@ define half @test_bitcast_i16tohalf(i16 %a) #0 {
 ; CHECK-LABEL: test_bitcast_i16tohalf:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = bitcast i16 %a to half
   ret half %r
@@ -1400,11 +1368,9 @@ define half @test_fma(half %a, half %b, half %c) #0 {
 define half @test_fabs(half %a) #0 {
 ; CHECK-CVT-LABEL: test_fabs:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    and w8, w8, #0x7fff
 ; CHECK-CVT-NEXT:    fmov s0, w8
-; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_fabs:
@@ -1478,28 +1444,19 @@ define half @test_copysign(half %a, half %b) #0 {
 ; CHECK-FP16-SD-LABEL: test_copysign:
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP16-SD-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_copysign:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
-; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_copysign:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
-; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-FP16-GI-NEXT:    ret
   %r = call half @llvm.copysign.f16(half %a, half %b)
   ret half %r
@@ -1510,7 +1467,6 @@ define half @test_copysign_f32(half %a, float %b) #0 {
 ; CHECK-CVT-SD:       // %bb.0:
 ; CHECK-CVT-SD-NEXT:    fcvt s0, h0
 ; CHECK-CVT-SD-NEXT:    mvni v2.4s, #128, lsl #24
-; CHECK-CVT-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-CVT-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-CVT-SD-NEXT:    fcvt h0, s0
 ; CHECK-CVT-SD-NEXT:    ret
@@ -1519,27 +1475,21 @@ define half @test_copysign_f32(half %a, float %b) #0 {
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_copysign_f32:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    fcvt h1, s1
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_copysign_f32:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    fcvt h1, s1
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-FP16-GI-NEXT:    ret
   %tb = fptrunc float %b to half
   %r = call half @llvm.copysign.f16(half %a, half %tb)
@@ -1560,27 +1510,21 @@ define half @test_copysign_f64(half %a, double %b) #0 {
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    fcvt h1, d1
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_copysign_f64:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    fcvt h1, d1
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_copysign_f64:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    fcvt h1, d1
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-FP16-GI-NEXT:    ret
   %tb = fptrunc double %b to half
   %r = call half @llvm.copysign.f16(half %a, half %tb)
@@ -1597,14 +1541,11 @@ define float @test_copysign_extended(half %a, half %b) #0 {
 ; CHECK-CVT-SD-NEXT:    fcvt s0, h0
 ; CHECK-CVT-SD-NEXT:    mvni v2.4s, #128, lsl #24
 ; CHECK-CVT-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-CVT-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_copysign_extended:
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
-; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP16-SD-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-FP16-SD-NEXT:    fcvt s0, h0
 ; CHECK-FP16-SD-NEXT:    ret
@@ -1612,8 +1553,6 @@ define float @test_copysign_extended(half %a, half %b) #0 {
 ; CHECK-CVT-GI-LABEL: test_copysign_extended:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
-; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; CHECK-CVT-GI-NEXT:    fcvt s0, h0
 ; CHECK-CVT-GI-NEXT:    ret
@@ -1621,8 +1560,6 @@ define float @test_copysign_extended(half %a, half %b) #0 {
 ; CHECK-FP16-GI-LABEL: test_copysign_extended:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
-; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; CHECK-FP16-GI-NEXT:    fcvt s0, h0
 ; CHECK-FP16-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 903aa8adf70851..17b75f89b32dab 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -144,7 +144,7 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
 ; CHECK-GI-LABEL: fabs_v4f128:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
-; CHECK-GI-NEXT:    mov v7.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
 ; CHECK-GI-NEXT:    mov x9, v1.d[1]
 ; CHECK-GI-NEXT:    mov x10, v2.d[1]
 ; CHECK-GI-NEXT:    mov x11, v3.d[1]
@@ -152,14 +152,13 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[0], v3.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
-; CHECK-GI-NEXT:    mov v7.d[1], x8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    and x8, x9, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x9, x10, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x10, x11, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    mov v1.d[1], x8
 ; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    mov v3.d[1], x10
-; CHECK-GI-NEXT:    mov v0.16b, v7.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)

diff  --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll
index 43e90070736345..90b701675ad4f2 100644
--- a/llvm/test/CodeGen/AArch64/fabs.ll
+++ b/llvm/test/CodeGen/AArch64/fabs.ll
@@ -27,11 +27,9 @@ entry:
 define half @fabs_f16(half %a) {
 ; CHECK-SD-NOFP16-LABEL: fabs_f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-SD-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-SD-NOFP16-NEXT:    and w8, w8, #0x7fff
 ; CHECK-SD-NOFP16-NEXT:    fmov s0, w8
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fabs_f16:
@@ -41,11 +39,9 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-GI-NOFP16-NEXT:    and w8, w8, #0x7fff
 ; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -70,27 +66,18 @@ entry:
 define <3 x double> @fabs_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fabs_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fabs v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fabs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fabs_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    fabs d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fabs d2, d2
 ; CHECK-GI-NEXT:    fabs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.fabs.v3f64(<3 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/faddp-half.ll b/llvm/test/CodeGen/AArch64/faddp-half.ll
index 447476e76ec64a..88676db854f4a3 100644
--- a/llvm/test/CodeGen/AArch64/faddp-half.ll
+++ b/llvm/test/CodeGen/AArch64/faddp-half.ll
@@ -5,19 +5,16 @@
 define half @faddp_2xhalf(<2 x half> %a) {
 ; CHECK-LABEL: faddp_2xhalf:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_2xhalf:
 ; CHECKNOFP16:       // %bb.0: // %entry
-; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
@@ -29,19 +26,16 @@ entry:
 define half @faddp_2xhalf_commute(<2 x half> %a) {
 ; CHECK-LABEL: faddp_2xhalf_commute:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_2xhalf_commute:
 ; CHECKNOFP16:       // %bb.0: // %entry
-; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
@@ -53,19 +47,16 @@ entry:
 define half @faddp_4xhalf(<4 x half> %a) {
 ; CHECK-LABEL: faddp_4xhalf:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_4xhalf:
 ; CHECKNOFP16:       // %bb.0: // %entry
-; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -77,19 +68,16 @@ entry:
 define half @faddp_4xhalf_commute(<4 x half> %a) {
 ; CHECK-LABEL: faddp_4xhalf_commute:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_4xhalf_commute:
 ; CHECKNOFP16:       // %bb.0: // %entry
-; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
-; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -115,7 +103,6 @@ define half @faddp_8xhalf(<8 x half> %a) {
 ; CHECKNOFP16-NEXT:    fadd v1.4s, v0.4s, v1.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
 ; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
-; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -141,7 +128,6 @@ define half @faddp_8xhalf_commute(<8 x half> %a) {
 ; CHECKNOFP16-NEXT:    fadd v1.4s, v1.4s, v0.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
 ; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
-; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git a/llvm/test/CodeGen/AArch64/faddp.ll b/llvm/test/CodeGen/AArch64/faddp.ll
index d0c8f6e26d4d6e..f86b3994c0f908 100644
--- a/llvm/test/CodeGen/AArch64/faddp.ll
+++ b/llvm/test/CodeGen/AArch64/faddp.ll
@@ -4,7 +4,6 @@
 define float @faddp_2xfloat(<2 x float> %a) {
 ; CHECK-LABEL: faddp_2xfloat:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -41,7 +40,6 @@ entry:
 define float @faddp_2xfloat_commute(<2 x float> %a) {
 ; CHECK-LABEL: faddp_2xfloat_commute:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -104,7 +102,6 @@ entry:
 define float @faddp_2xfloat_strict(<2 x float> %a) #0 {
 ; CHECK-LABEL: faddp_2xfloat_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -141,7 +138,6 @@ entry:
 define float @faddp_2xfloat_commute_strict(<2 x float> %a) #0 {
 ; CHECK-LABEL: faddp_2xfloat_commute_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -278,7 +274,6 @@ define <4 x half> @faddp_v8f16(<8 x half> %a, <8 x half> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    faddp v0.8h, v0.8h, v0.8h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %1 = fadd <8 x half> %a, %b
   %2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

diff  --git a/llvm/test/CodeGen/AArch64/faddsub.ll b/llvm/test/CodeGen/AArch64/faddsub.ll
index b15579199a0598..cf7eda1d3e6b76 100644
--- a/llvm/test/CodeGen/AArch64/faddsub.ll
+++ b/llvm/test/CodeGen/AArch64/faddsub.ll
@@ -68,34 +68,20 @@ entry:
 define <3 x double> @fadd_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fadd_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fadd v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fadd v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fadd_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fadd d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fadd d2, d2, d5
 ; CHECK-GI-NEXT:    fadd v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fadd <3 x double> %a, %b
@@ -409,34 +395,20 @@ entry:
 define <3 x double> @fsub_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fsub_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fsub v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fsub v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fsub_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fsub d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fsub d2, d2, d5
 ; CHECK-GI-NEXT:    fsub v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fsub <3 x double> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
index 5e14e63a23d3ca..182cdee6459dc6 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@@ -734,7 +734,6 @@ define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) {
 define i64 @load_sext_shift_offreg_imm1(i32 %a) {
 ; CHECK-LABEL: load_sext_shift_offreg_imm1:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x8, x0, #3, #32
 ; CHECK-NEXT:    ldr x0, [x8, #8]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
index 3dc4771eb01c15..eae90015773004 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -53,7 +53,6 @@ define ptr @test_array4(ptr %a) {
 define ptr @test_array5(ptr %a, i32 %i) {
 ; CHECK-LABEL: test_array5:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    mov x9, #4 ; =0x4
 ; CHECK-NEXT:    madd x0, x8, x9, x0

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
index 76f81719b4547e..750be0b19f2c70 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
@@ -55,7 +55,6 @@ define i64 @asr_zext_i1_i64(i1 %b) {
 define i64 @asr_sext_i1_i64(i1 %b) {
 ; CHECK-LABEL: asr_sext_i1_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x0, x0, #0, #1
 ; CHECK-NEXT:    ret
   %1 = sext i1 %b to i64
@@ -161,7 +160,6 @@ define i32 @lsl_sext_i1_i32(i1 %b) {
 define i64 @lsl_zext_i1_i64(i1 %b) {
 ; CHECK-LABEL: lsl_zext_i1_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #4, #1
 ; CHECK-NEXT:    ret
   %1 = zext i1 %b to i64
@@ -172,7 +170,6 @@ define i64 @lsl_zext_i1_i64(i1 %b) {
 define i64 @lsl_sext_i1_i64(i1 %b) {
 ; CHECK-LABEL: lsl_sext_i1_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #4, #1
 ; CHECK-NEXT:    ret
   %1 = sext i1 %b to i64
@@ -247,7 +244,6 @@ define i32 @lsl_sext_i8_i32(i8 %b) {
 define i64 @lsl_zext_i8_i64(i8 %b) {
 ; CHECK-LABEL: lsl_zext_i8_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #4, #8
 ; CHECK-NEXT:    ret
   %1 = zext i8 %b to i64
@@ -258,7 +254,6 @@ define i64 @lsl_zext_i8_i64(i8 %b) {
 define i64 @lsl_sext_i8_i64(i8 %b) {
 ; CHECK-LABEL: lsl_sext_i8_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #4, #8
 ; CHECK-NEXT:    ret
   %1 = sext i8 %b to i64
@@ -311,7 +306,6 @@ define i32 @lsl_sext_i16_i32(i16 %b) {
 define i64 @lsl_zext_i16_i64(i16 %b) {
 ; CHECK-LABEL: lsl_zext_i16_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #8, #16
 ; CHECK-NEXT:    ret
   %1 = zext i16 %b to i64
@@ -322,7 +316,6 @@ define i64 @lsl_zext_i16_i64(i16 %b) {
 define i64 @lsl_sext_i16_i64(i16 %b) {
 ; CHECK-LABEL: lsl_sext_i16_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #8, #16
 ; CHECK-NEXT:    ret
   %1 = sext i16 %b to i64
@@ -351,7 +344,6 @@ define zeroext i32 @lsl_i32(i32 %a) {
 define i64 @lsl_zext_i32_i64(i32 %b) {
 ; CHECK-LABEL: lsl_zext_i32_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #16, #32
 ; CHECK-NEXT:    ret
   %1 = zext i32 %b to i64
@@ -362,7 +354,6 @@ define i64 @lsl_zext_i32_i64(i32 %b) {
 define i64 @lsl_sext_i32_i64(i32 %b) {
 ; CHECK-LABEL: lsl_sext_i32_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #16, #32
 ; CHECK-NEXT:    ret
   %1 = sext i32 %b to i64
@@ -680,7 +671,6 @@ define i32 @ashr_zero(i32 %a) {
 define i64 @shl_zext_zero(i32 %a) {
 ; CHECK-LABEL: shl_zext_zero:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsr w0, w0, #0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %a to i64
@@ -691,7 +681,6 @@ define i64 @shl_zext_zero(i32 %a) {
 define i64 @lshr_zext_zero(i32 %a) {
 ; CHECK-LABEL: lshr_zext_zero:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsr w0, w0, #0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %a to i64
@@ -702,7 +691,6 @@ define i64 @lshr_zext_zero(i32 %a) {
 define i64 @ashr_zext_zero(i32 %a) {
 ; CHECK-LABEL: ashr_zext_zero:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsr w0, w0, #0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %a to i64

diff  --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 66f26fc9d85973..ac1450bef32102 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -247,7 +247,6 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) {
 ; CHECK-SD-NOFP16-NEXT:    cmp w0, #0
 ; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    fcsel s0, s9, s8, lt
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -285,7 +284,6 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) {
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    csel w8, w8, w9, lt
 ; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -347,11 +345,8 @@ define half @f16_half(half %a, half %b, half %d, half %e) {
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h3 killed $h3 def $s3
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-SD-NOFP16-NEXT:    fcmp s0, s1
 ; CHECK-SD-NOFP16-NEXT:    fcsel s0, s2, s3, mi
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: f16_half:
@@ -364,26 +359,20 @@ define half @f16_half(half %a, half %b, half %d, half %e) {
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h2 killed $h2 def $s2
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h3 killed $h3 def $s3
 ; CHECK-GI-NOFP16-NEXT:    fmov w8, s2
 ; CHECK-GI-NOFP16-NEXT:    fmov w9, s3
 ; CHECK-GI-NOFP16-NEXT:    fcmp s0, s1
 ; CHECK-GI-NOFP16-NEXT:    csel w8, w8, w9, mi
 ; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: f16_half:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $h2 killed $h2 def $s2
-; CHECK-GI-FP16-NEXT:    // kill: def $h3 killed $h3 def $s3
 ; CHECK-GI-FP16-NEXT:    fcmp h0, h1
 ; CHECK-GI-FP16-NEXT:    fmov w8, s2
 ; CHECK-GI-FP16-NEXT:    fmov w9, s3
 ; CHECK-GI-FP16-NEXT:    csel w8, w8, w9, mi
 ; CHECK-GI-FP16-NEXT:    fmov s0, w8
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fcmp olt half %a, %b
@@ -679,12 +668,10 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 160
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    stp q2, q5, [sp, #112] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-SD-NEXT:    ldr d5, [sp, #184]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    str q3, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp d3, d2, [sp, #168]
-; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    mov v1.16b, v4.16b
@@ -722,9 +709,6 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-SD-NEXT:    fmov d2, x8
 ; CHECK-SD-NEXT:    bsl v2.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    add sp, sp, #160
 ; CHECK-SD-NEXT:    ret
 ;
@@ -746,8 +730,6 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-GI-NEXT:    stp q5, q2, [sp, #32] // 32-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr d2, [sp, #184]
 ; CHECK-GI-NEXT:    ldr x20, [sp, #200]
-; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    str q7, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    str q2, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr d2, [sp, #192]
@@ -785,7 +767,6 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #176
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -809,57 +790,39 @@ entry:
 define <3 x double> @v3f64_double(<3 x double> %a, <3 x double> %b, <3 x double> %d, <3 x double> %e) {
 ; CHECK-SD-LABEL: v3f64_double:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    ldr d16, [sp, #24]
-; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    ldr d16, [sp, #24]
 ; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fcmgt v2.2d, v5.2d, v2.2d
+; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    bsl v2.16b, v17.16b, v16.16b
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v3f64_double:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-GI-NEXT:    fcmp d2, d5
-; CHECK-GI-NEXT:    ldr x8, [sp]
-; CHECK-GI-NEXT:    ldr x10, [sp, #24]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-NEXT:    ldr x8, [sp]
 ; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
-; CHECK-GI-NEXT:    cset w9, mi
-; CHECK-GI-NEXT:    sbfx x9, x9, #0, #1
+; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-NEXT:    fcmp d2, d5
+; CHECK-GI-NEXT:    ldr x10, [sp, #24]
 ; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-GI-NEXT:    cset w9, mi
+; CHECK-GI-NEXT:    sbfx x9, x9, #0, #1
+; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-GI-NEXT:    and x8, x8, x9
 ; CHECK-GI-NEXT:    bic x9, x10, x9
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fcmp olt <3 x double> %a, %b
@@ -905,12 +868,6 @@ entry:
 define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x i32> %e) {
 ; CHECK-SD-LABEL: v3f64_i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    fcmgt v1.2d, v5.2d, v2.2d
@@ -921,20 +878,16 @@ define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x
 ;
 ; CHECK-GI-LABEL: v3f64_i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    fcmp d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
+; CHECK-GI-NEXT:    fcmp d2, d5
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
+; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    cset w9, mi
 ; CHECK-GI-NEXT:    mov v2.s[0], w9
-; CHECK-GI-NEXT:    mov w9, #-1 // =0xffffffff
-; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    mov v1.s[1], w8
+; CHECK-GI-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-GI-NEXT:    mov v3.s[0], w9
 ; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-GI-NEXT:    mov v1.s[2], w8

diff  --git a/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll b/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
index b9713b57cef681..07920220452cd2 100644
--- a/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
+++ b/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
@@ -96,10 +96,7 @@ define float @copysign32(float %a, float %b) {
 ; CHECK-LABEL: copysign32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mvni v2.4s, #128, lsl #24
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NONEON-LABEL: copysign32:
@@ -119,11 +116,8 @@ define double @copysign64(double %a, double %b) {
 ; CHECK-LABEL: copysign64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg v2.2d, v2.2d
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NONEON-LABEL: copysign64:

diff  --git a/llvm/test/CodeGen/AArch64/fcopysign.ll b/llvm/test/CodeGen/AArch64/fcopysign.ll
index a42ec8e253be29..67eb8428dc23b3 100644
--- a/llvm/test/CodeGen/AArch64/fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/fcopysign.ll
@@ -6,11 +6,8 @@ define double @copysign_f64(double %a, double %b) {
 ; CHECK-LABEL: copysign_f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg v2.2d, v2.2d
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = call double @llvm.copysign.f64(double %a, double %b)
@@ -21,19 +18,13 @@ define float @copysign_f32(float %a, float %b) {
 ; CHECK-SD-LABEL: copysign_f32:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mvni v2.4s, #128, lsl #24
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: copysign_f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mvni v2.2s, #128, lsl #24
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $d0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call float @llvm.copysign.f32(float %a, float %b)
@@ -53,10 +44,7 @@ define half @copysign_f16(half %a, half %b) {
 ; CHECK-GI-LABEL: copysign_f16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mvni v2.4h, #128, lsl #8
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
-; CHECK-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call half @llvm.copysign.f16(half %a, half %b)
@@ -79,42 +67,28 @@ define <3 x double> @copysign_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: copysign_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    movi v6.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fneg v1.2d, v6.2d
 ; CHECK-SD-NEXT:    bif v0.16b, v3.16b, v1.16b
 ; CHECK-SD-NEXT:    bif v2.16b, v5.16b, v1.16b
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: copysign_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    movi v6.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x8, d2
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    fneg v1.2d, v6.2d
 ; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x9, x9, #0x8000000000000000
-; CHECK-GI-NEXT:    fneg v1.2d, v6.2d
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v1.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.copysign.v3f64(<3 x double> %a, <3 x double> %b)

diff  --git a/llvm/test/CodeGen/AArch64/fcvt.ll b/llvm/test/CodeGen/AArch64/fcvt.ll
index b408e9c1bd4e60..72d124f8122869 100644
--- a/llvm/test/CodeGen/AArch64/fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt.ll
@@ -66,27 +66,18 @@ entry:
 define <3 x double> @ceil_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: ceil_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintp v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintp v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ceil_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frintp d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frintp d2, d2
 ; CHECK-GI-NEXT:    frintp v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.ceil.v3f64(<3 x double> %a)
@@ -365,27 +356,18 @@ entry:
 define <3 x double> @floor_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: floor_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintm v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintm v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: floor_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frintm d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frintm d2, d2
 ; CHECK-GI-NEXT:    frintm v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.floor.v3f64(<3 x double> %a)
@@ -664,27 +646,18 @@ entry:
 define <3 x double> @nearbyint_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: nearbyint_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frinti v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frinti v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: nearbyint_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frinti d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frinti d2, d2
 ; CHECK-GI-NEXT:    frinti v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.nearbyint.v3f64(<3 x double> %a)
@@ -963,27 +936,18 @@ entry:
 define <3 x double> @roundeven_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: roundeven_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintn v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintn v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: roundeven_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frintn d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frintn d2, d2
 ; CHECK-GI-NEXT:    frintn v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.roundeven.v3f64(<3 x double> %a)
@@ -1262,27 +1226,18 @@ entry:
 define <3 x double> @rint_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: rint_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintx v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintx v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: rint_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frintx d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frintx d2, d2
 ; CHECK-GI-NEXT:    frintx v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.rint.v3f64(<3 x double> %a)
@@ -1561,27 +1516,18 @@ entry:
 define <3 x double> @round_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: round_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frinta v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frinta v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: round_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frinta d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frinta d2, d2
 ; CHECK-GI-NEXT:    frinta v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.round.v3f64(<3 x double> %a)
@@ -1860,27 +1806,18 @@ entry:
 define <3 x double> @trunc_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: trunc_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintz v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintz v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: trunc_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    frintz d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    frintz d2, d2
 ; CHECK-GI-NEXT:    frintz v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.trunc.v3f64(<3 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 251d7a424175b3..d9651fe63d28bf 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -305,7 +305,6 @@ define <2 x i32> @test4_sat(<2 x double> %d) {
 ; CHECK-NEXT:    fcvtzs w9, d1
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
   %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)

diff  --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index 0627250d077912..ec7d1918caa128 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -100,7 +100,6 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
 define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
 ; CHECK-LABEL: splat_three_fdiv_4xfloat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    fmov v4.4s, #1.00000000
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    fdiv v4.4s, v4.4s, v0.4s
@@ -120,7 +119,6 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
 define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
 ; CHECK-LABEL: splat_fdiv_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    fmov v2.4s, #1.00000000
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    fdiv v0.4s, v2.4s, v0.4s
@@ -171,9 +169,8 @@ entry:
 define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
 ; CHECK-LABEL: splat_fdiv_nxv2f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/fdiv.ll b/llvm/test/CodeGen/AArch64/fdiv.ll
index 5bdccccc62b99c..5897b2235296b6 100644
--- a/llvm/test/CodeGen/AArch64/fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv.ll
@@ -68,34 +68,20 @@ entry:
 define <3 x double> @fdiv_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fdiv_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fdiv v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fdiv v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fdiv_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fdiv d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fdiv d2, d2, d5
 ; CHECK-GI-NEXT:    fdiv v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fdiv <3 x double> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/fexplog.ll b/llvm/test/CodeGen/AArch64/fexplog.ll
index f13e2fcd1c4483..12ca7b655611c3 100644
--- a/llvm/test/CodeGen/AArch64/fexplog.ll
+++ b/llvm/test/CodeGen/AArch64/fexplog.ll
@@ -69,13 +69,10 @@ define <2 x double> @exp_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -90,14 +87,11 @@ define <2 x double> @exp_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -179,22 +173,17 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -215,28 +204,23 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl exp
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -252,20 +236,15 @@ define <2 x float> @exp_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -277,16 +256,12 @@ define <2 x float> @exp_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -308,20 +283,16 @@ define <3 x float> @exp_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -339,18 +310,14 @@ define <3 x float> @exp_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -373,27 +340,22 @@ define <4 x float> @exp_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -414,22 +376,17 @@ define <4 x float> @exp_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -455,46 +412,37 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -528,41 +476,31 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -741,7 +679,6 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -772,7 +709,6 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -787,7 +723,6 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -817,8 +752,7 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1241,19 +1175,20 @@ define <16 x half> @exp_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -1372,13 +1307,10 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1393,14 +1325,11 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1482,22 +1411,17 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -1518,28 +1442,23 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl exp2
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -1555,20 +1474,15 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1580,16 +1494,12 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -1611,20 +1521,16 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1642,18 +1548,14 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -1676,27 +1578,22 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1717,22 +1614,17 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -1758,46 +1650,37 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -1831,41 +1714,31 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -2044,7 +1917,6 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -2075,7 +1947,6 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2090,7 +1961,6 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -2120,8 +1990,7 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2544,19 +2413,20 @@ define <16 x half> @exp2_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -2675,13 +2545,10 @@ define <2 x double> @log_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -2696,14 +2563,11 @@ define <2 x double> @log_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -2785,22 +2649,17 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -2821,28 +2680,23 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -2858,20 +2712,15 @@ define <2 x float> @log_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2883,16 +2732,12 @@ define <2 x float> @log_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -2914,20 +2759,16 @@ define <3 x float> @log_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -2945,18 +2786,14 @@ define <3 x float> @log_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -2979,27 +2816,22 @@ define <4 x float> @log_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -3020,22 +2852,17 @@ define <4 x float> @log_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -3061,46 +2888,37 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -3134,41 +2952,31 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -3347,7 +3155,6 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -3378,7 +3185,6 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3393,7 +3199,6 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -3423,8 +3228,7 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -3847,19 +3651,20 @@ define <16 x half> @log_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -3978,13 +3783,10 @@ define <2 x double> @log2_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -3999,14 +3801,11 @@ define <2 x double> @log2_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -4088,22 +3887,17 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log2
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -4124,28 +3918,23 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log2
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -4161,20 +3950,15 @@ define <2 x float> @log2_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4186,16 +3970,12 @@ define <2 x float> @log2_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -4217,20 +3997,16 @@ define <3 x float> @log2_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -4248,18 +4024,14 @@ define <3 x float> @log2_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -4282,27 +4054,22 @@ define <4 x float> @log2_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -4323,22 +4090,17 @@ define <4 x float> @log2_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -4364,46 +4126,37 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -4437,41 +4190,31 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -4650,7 +4393,6 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -4681,7 +4423,6 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4696,7 +4437,6 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -4726,8 +4466,7 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -5150,19 +4889,20 @@ define <16 x half> @log2_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -5281,13 +5021,10 @@ define <2 x double> @log10_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log10
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log10
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -5302,14 +5039,11 @@ define <2 x double> @log10_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log10
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -5391,22 +5125,17 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log10
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log10
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log10
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -5427,28 +5156,23 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log10
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log10
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -5464,20 +5188,15 @@ define <2 x float> @log10_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5489,16 +5208,12 @@ define <2 x float> @log10_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -5520,20 +5235,16 @@ define <3 x float> @log10_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -5551,18 +5262,14 @@ define <3 x float> @log10_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -5585,27 +5292,22 @@ define <4 x float> @log10_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -5626,22 +5328,17 @@ define <4 x float> @log10_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -5667,46 +5364,37 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -5740,41 +5428,31 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -5953,7 +5631,6 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -5984,7 +5661,6 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5999,7 +5675,6 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -6029,8 +5704,7 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -6453,19 +6127,20 @@ define <16 x half> @log10_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
index ab7c2afbbf871f..6e189c1206c15f 100644
--- a/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
@@ -106,7 +106,6 @@ define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
 define float @l0_extract_f_v2s(<2 x i32> %u) {
 ; CHECK-LABEL: l0_extract_f_v2s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf s0, s0
 ; CHECK-NEXT:    ret
   %i = extractelement <2 x i32> %u, i64 0
@@ -118,7 +117,6 @@ define float @l0_extract_f_v2s(<2 x i32> %u) {
 define float @l0_extract_f_v2u(<2 x i32> %u) {
 ; CHECK-LABEL: l0_extract_f_v2u:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf s0, s0
 ; CHECK-NEXT:    ret
   %i = extractelement <2 x i32> %u, i64 0
@@ -247,7 +245,6 @@ define half @ln_extract_h_v8u(<8 x i16> %u, i32 %n) #0 {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-NEXT:    ldrh w8, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 4ab5db450a7f32..02f60f5aec33ab 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -5,13 +5,10 @@
 define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
 ; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v2.2s, v0.s[1]
 ; CHECK-SD-NEXT:    mov v1.16b, v2.16b
 ; CHECK-SD-NEXT:    mov v1.h[0], v0.h[1]
 ; CHECK-SD-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
@@ -29,7 +26,6 @@ define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 v2.8h, v0.8h, v0.8h
 ; CHECK-NEXT:    uzp2 v1.8h, v0.8h, v0.8h
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    fmov d0, d2
 ; CHECK-NEXT:    ret
   %retval = call {<4 x half>, <4 x half>} @llvm.vector.deinterleave2.v8f16(<8 x half> %vec)
@@ -60,7 +56,6 @@ define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    uzp1 v2.4s, v0.4s, v0.4s
 ; CHECK-GI-NEXT:    uzp2 v1.4s, v0.4s, v0.4s
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    fmov d0, d2
 ; CHECK-GI-NEXT:    ret
   %retval = call {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float> %vec)

diff  --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index a9618fdc2dec30..961df36b11305d 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -14,18 +14,14 @@ define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
 define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) {
 ; CHECK-SD-LABEL: interleave2_v8f16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: interleave2_v8f16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    zip1 v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
   %retval = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1)
@@ -46,8 +42,6 @@ define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) {
 define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
 ; CHECK-SD-LABEL: interleave2_v4f32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    rev64 v1.4s, v0.4s
 ; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
@@ -55,8 +49,6 @@ define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
 ;
 ; CHECK-GI-LABEL: interleave2_v4f32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    zip1 v0.4s, v0.4s, v1.4s
 ; CHECK-GI-NEXT:    ret
   %retval = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1)

diff  --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
index 86c1474068482e..c50c00ab4d1a3c 100644
--- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
@@ -9,8 +9,6 @@ declare <2 x fp128> @llvm.maximum.v2f128(<2 x fp128>, <2 x fp128>)
 define <4 x half> @fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-LABEL: fmaximum_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h2, v1.h[1]
 ; CHECK-NEXT:    mov h3, v0.h[1]
 ; CHECK-NEXT:    mov h4, v1.h[2]
@@ -36,7 +34,6 @@ define <4 x half> @fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NEXT:    fcvt h1, s1
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %x, <4 x half> %y)
   ret <4 x half> %r

diff  --git a/llvm/test/CodeGen/AArch64/fminimummaximum.ll b/llvm/test/CodeGen/AArch64/fminimummaximum.ll
index fb12f8acf17453..2f608d77bf3893 100644
--- a/llvm/test/CodeGen/AArch64/fminimummaximum.ll
+++ b/llvm/test/CodeGen/AArch64/fminimummaximum.ll
@@ -129,34 +129,20 @@ entry:
 define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: min_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmin v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmin v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmin d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmin d2, d2, d5
 ; CHECK-GI-NEXT:    fmin v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.minimum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -166,34 +152,20 @@ entry:
 define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: max_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmax v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmax v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmax d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmax d2, d2, d5
 ; CHECK-GI-NEXT:    fmax v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.maximum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -297,76 +269,56 @@ entry:
 define <7 x float> @min_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: min_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    fmin v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fmin v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fmin v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.minimum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -376,76 +328,56 @@ entry:
 define <7 x float> @max_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: max_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    fmax v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fmax v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fmax v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fmax v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.maximum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -489,8 +421,6 @@ entry:
 define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: min_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -516,7 +446,6 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: min_v4f16:
@@ -544,8 +473,6 @@ entry:
 define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: max_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -571,7 +498,6 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: max_v4f16:

diff  --git a/llvm/test/CodeGen/AArch64/fminmax.ll b/llvm/test/CodeGen/AArch64/fminmax.ll
index 64f0da8b4cd0f9..6642ea37374fa9 100644
--- a/llvm/test/CodeGen/AArch64/fminmax.ll
+++ b/llvm/test/CodeGen/AArch64/fminmax.ll
@@ -129,34 +129,20 @@ entry:
 define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: min_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fminnm v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fminnm v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fminnm d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fminnm d2, d2, d5
 ; CHECK-GI-NEXT:    fminnm v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.minnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -166,34 +152,20 @@ entry:
 define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: max_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmaxnm v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmaxnm v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmaxnm d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmaxnm d2, d2, d5
 ; CHECK-GI-NEXT:    fmaxnm v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.maxnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -297,76 +269,56 @@ entry:
 define <7 x float> @min_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: min_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    fminnm v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fminnm v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fminnm v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fminnm v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.minnum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -376,76 +328,56 @@ entry:
 define <7 x float> @max_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: max_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
 ; CHECK-SD-NEXT:    fmaxnm v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fmaxnm v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fmaxnm v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fmaxnm v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.maxnum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -489,8 +421,6 @@ entry:
 define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: min_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -516,7 +446,6 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: min_v4f16:
@@ -544,8 +473,6 @@ entry:
 define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: max_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -571,7 +498,6 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: max_v4f16:

diff  --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll
index 7bcaae5a77eac5..0c1ed2c07278dd 100644
--- a/llvm/test/CodeGen/AArch64/fmla.ll
+++ b/llvm/test/CodeGen/AArch64/fmla.ll
@@ -71,34 +71,19 @@ entry:
 define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
 ; CHECK-SD-LABEL: fma_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    ldr d3, [sp]
 ; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    fmov d0, d6
 ; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d6
 ; CHECK-SD-NEXT:    fmov d2, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fma_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
@@ -295,9 +280,6 @@ entry:
 define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-SD-NOFP16-LABEL: fma_v4f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v2.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[1]
@@ -330,7 +312,6 @@ define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v3.h[0]
 ; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fma_v4f16:
@@ -722,34 +703,19 @@ entry:
 define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
 ; CHECK-SD-LABEL: fmuladd_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    ldr d3, [sp]
 ; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    fmov d0, d6
 ; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d6
 ; CHECK-SD-NEXT:    fmov d2, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fmuladd_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
@@ -1170,34 +1136,19 @@ entry:
 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
 ; CHECK-SD-LABEL: fmul_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    ldr d3, [sp]
 ; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    fmov d0, d6
 ; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d6
 ; CHECK-SD-NEXT:    fmov d2, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fmul_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]

diff  --git a/llvm/test/CodeGen/AArch64/fmul.ll b/llvm/test/CodeGen/AArch64/fmul.ll
index bd3d1353e643e5..945bd5209bf4c4 100644
--- a/llvm/test/CodeGen/AArch64/fmul.ll
+++ b/llvm/test/CodeGen/AArch64/fmul.ll
@@ -68,34 +68,20 @@ entry:
 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fmul_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmul v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmul v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fmul_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmul d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmul d2, d2, d5
 ; CHECK-GI-NEXT:    fmul v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fmul <3 x double> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/fneg.ll b/llvm/test/CodeGen/AArch64/fneg.ll
index de2671afe60ab7..35d91a5bb85a2e 100644
--- a/llvm/test/CodeGen/AArch64/fneg.ll
+++ b/llvm/test/CodeGen/AArch64/fneg.ll
@@ -27,11 +27,9 @@ entry:
 define half @fabs_f16(half %a) {
 ; CHECK-SD-NOFP16-LABEL: fabs_f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-SD-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-SD-NOFP16-NEXT:    eor w8, w8, #0x8000
 ; CHECK-SD-NOFP16-NEXT:    fmov s0, w8
-; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fabs_f16:
@@ -41,11 +39,9 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-GI-NOFP16-NEXT:    eor w8, w8, #0xffff8000
 ; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -70,27 +66,18 @@ entry:
 define <3 x double> @fabs_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fabs_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fneg v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fneg v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fabs_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    fneg d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fneg d2, d2
 ; CHECK-GI-NEXT:    fneg v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fneg <3 x double> %a

diff  --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index a78addc490086d..f8d1577b5e1fa0 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -52,13 +52,11 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    fmov w0, s1
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -66,7 +64,6 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEON-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEON-NEXT:    mov w0, v0.s[3]
@@ -74,7 +71,6 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEON-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEON-NEXT:    mov v0.16b, v1.16b
@@ -224,7 +220,6 @@ define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    fmov d1, #3.00000000
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    lsl w9, w9, w0
@@ -433,7 +428,6 @@ define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
 ; CHECK-LABEL: fmul_pow_shl_cnt_safe:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    and w8, w8, #0xffff
 ; CHECK-NEXT:    ucvtf d0, w8
@@ -541,7 +535,6 @@ define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
 ; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #3936146074321813504 // =0x36a0000000000000
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x8, x8, x0, lsl #52
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
@@ -585,7 +578,6 @@ define fastcc i1 @quantum_hadamard(i32 %0) {
 ; CHECK-LABEL: quantum_hadamard:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x8, x8, x0, lsl #52
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fcvt s0, d0

diff  --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index d9d80f1cb50ee1..40efc7389b04d2 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -39,9 +39,9 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    cmp x8, #1000
 ; CHECK-NEXT:    ldp q2, q1, [x9]
-; CHECK-NEXT:    fcvtzu.4s v4, v1
-; CHECK-NEXT:    fcvtzu.4s v3, v2
-; CHECK-NEXT:    tbl.16b v1, { v3, v4 }, v0
+; CHECK-NEXT:    fcvtzu.4s v3, v1
+; CHECK-NEXT:    fcvtzu.4s v2, v2
+; CHECK-NEXT:    tbl.16b v1, { v2, v3 }, v0
 ; CHECK-NEXT:    str d1, [x1], #16
 ; CHECK-NEXT:    b.eq LBB0_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
@@ -252,12 +252,12 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    cmp x8, #1000
 ; CHECK-NEXT:    ldp q2, q1, [x9, #32]
-; CHECK-NEXT:    fcvtzu.4s v7, v1
+; CHECK-NEXT:    fcvtzu.4s v5, v1
 ; CHECK-NEXT:    ldp q1, q3, [x9]
-; CHECK-NEXT:    fcvtzu.4s v6, v2
-; CHECK-NEXT:    fcvtzu.4s v5, v3
-; CHECK-NEXT:    fcvtzu.4s v4, v1
-; CHECK-NEXT:    tbl.16b v1, { v4, v5, v6, v7 }, v0
+; CHECK-NEXT:    fcvtzu.4s v4, v2
+; CHECK-NEXT:    fcvtzu.4s v3, v3
+; CHECK-NEXT:    fcvtzu.4s v2, v1
+; CHECK-NEXT:    tbl.16b v1, { v2, v3, v4, v5 }, v0
 ; CHECK-NEXT:    str q1, [x1], #32
 ; CHECK-NEXT:    b.eq LBB4_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
@@ -316,20 +316,20 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) {
 ; CHECK-NEXT:    ldp q3, q4, [x9, #32]
 ; CHECK-NEXT:    ldp q5, q6, [x10]
 ; CHECK-NEXT:    fcvtzu.4s v19, v1
+; CHECK-NEXT:    ldp q7, q1, [x9]
+; CHECK-NEXT:    fcvtzu.4s v4, v4
 ; CHECK-NEXT:    fcvtzu.4s v18, v2
-; CHECK-NEXT:    ldp q2, q1, [x9]
-; CHECK-NEXT:    fcvtzu.4s v23, v4
+; CHECK-NEXT:    fcvtzu.4s v3, v3
 ; CHECK-NEXT:    fcvtzu.4s v17, v6
-; CHECK-NEXT:    add x9, x2, x8, lsl #5
-; CHECK-NEXT:    fcvtzu.4s v22, v3
 ; CHECK-NEXT:    fcvtzu.4s v16, v5
+; CHECK-NEXT:    add x9, x2, x8, lsl #5
+; CHECK-NEXT:    fcvtzu.4s v2, v1
+; CHECK-NEXT:    fcvtzu.4s v1, v7
 ; CHECK-NEXT:    add x8, x8, #1
-; CHECK-NEXT:    fcvtzu.4s v21, v1
 ; CHECK-NEXT:    cmp x8, #1000
-; CHECK-NEXT:    fcvtzu.4s v20, v2
-; CHECK-NEXT:    tbl.16b v1, { v16, v17, v18, v19 }, v0
-; CHECK-NEXT:    tbl.16b v2, { v20, v21, v22, v23 }, v0
-; CHECK-NEXT:    stp q2, q1, [x9]
+; CHECK-NEXT:    tbl.16b v5, { v16, v17, v18, v19 }, v0
+; CHECK-NEXT:    tbl.16b v1, { v1, v2, v3, v4 }, v0
+; CHECK-NEXT:    stp q1, q5, [x9]
 ; CHECK-NEXT:    b.eq LBB5_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index 83e60c10897624..97f092d5bf9e84 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -702,7 +702,6 @@ define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 {
 define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 ; CHECK-LABEL: sitofp_v1f64_v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
@@ -713,7 +712,6 @@ define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 ; CHECK-LABEL: uitofp_v1f64_v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    ucvtf d0, w8
 ; CHECK-NEXT:    ret
@@ -724,7 +722,6 @@ define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 ; CHECK-LABEL: sitofp_v1f64_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    scvtf d0, x8
 ; CHECK-NEXT:    ret
@@ -735,7 +732,6 @@ define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 define <1 x double> @uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 ; CHECK-LABEL: uitofp_v1f64_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    ucvtf d0, x8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index bb3f9a3e52a16b..53afad929622fa 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -25,20 +25,11 @@ entry:
 define <3 x double> @max_nnan_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: max_nnan_v3f64:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
-; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
-; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    fmaxnm v2.2d, v2.2d, v5.2d
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    fmaxnm v0.2d, v0.2d, v3.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -100,29 +91,19 @@ entry:
 define <5 x float> @max_nnan_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: max_nnan_v5f32:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
-; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
-; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
-; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
-; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
-; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
-; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
-; AARCH64-NEXT:    mov x8, sp
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
+; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
 ; AARCH64-NEXT:    fmaxnm v4.4s, v4.4s, v2.4s
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    mov v0.s[3], v3.s[0]
 ; AARCH64-NEXT:    ld1 { v5.s }[3], [x8]
 ; AARCH64-NEXT:    fmaxnm v0.4s, v0.4s, v5.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -184,17 +165,9 @@ entry:
 define <9 x half> @max_nnan_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: max_nnan_v9f16:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
-; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
-; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
-; AARCH64-NEXT:    add x9, sp, #16
-; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
-; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
-; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
-; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
-; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
+; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]
@@ -261,20 +234,11 @@ entry:
 define <3 x double> @min_nnan_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: min_nnan_v3f64:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
-; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
-; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v5.2d
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    fminnm v0.2d, v0.2d, v3.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -336,29 +300,19 @@ entry:
 define <5 x float> @min_nnan_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: min_nnan_v5f32:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
-; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
-; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
-; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
-; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
-; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
-; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
-; AARCH64-NEXT:    mov x8, sp
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
+; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
 ; AARCH64-NEXT:    fminnm v4.4s, v4.4s, v2.4s
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    mov v0.s[3], v3.s[0]
 ; AARCH64-NEXT:    ld1 { v5.s }[3], [x8]
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v5.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <5 x float> @llvm.minimumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -420,17 +374,9 @@ entry:
 define <9 x half> @min_nnan_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: min_nnan_v9f16:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
-; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
-; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
-; AARCH64-NEXT:    add x9, sp, #16
-; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
-; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
-; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
-; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
-; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
+; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]
@@ -501,12 +447,6 @@ entry:
 define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: max_v3f64:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
-; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
-; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v2.2d
@@ -516,9 +456,6 @@ define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-NEXT:    fminnm v1.2d, v5.2d, v5.2d
 ; AARCH64-NEXT:    fmaxnm v2.2d, v2.2d, v1.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -592,17 +529,9 @@ entry:
 define <5 x float> @max_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: max_v5f32:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
-; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
-; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
-; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
-; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
-; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
-; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
-; AARCH64-NEXT:    mov x8, sp
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
+; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
@@ -613,12 +542,10 @@ define <5 x float> @max_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-NEXT:    fminnm v1.4s, v5.4s, v5.4s
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v0.4s
 ; AARCH64-NEXT:    fmaxnm v4.4s, v3.4s, v2.4s
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -692,17 +619,9 @@ entry:
 define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: max_v9f16:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
-; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
-; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
-; AARCH64-NEXT:    add x9, sp, #16
-; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
-; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
-; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
-; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
-; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
+; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]
@@ -781,12 +700,6 @@ entry:
 define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: min_v3f64:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
-; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
-; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v2.2d
@@ -796,9 +709,6 @@ define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-NEXT:    fminnm v1.2d, v5.2d, v5.2d
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v1.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -872,17 +782,9 @@ entry:
 define <5 x float> @min_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: min_v5f32:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
-; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
-; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
-; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
-; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
-; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
-; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
-; AARCH64-NEXT:    mov x8, sp
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
+; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
@@ -893,12 +795,10 @@ define <5 x float> @min_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-NEXT:    fminnm v1.4s, v5.4s, v5.4s
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v0.4s
 ; AARCH64-NEXT:    fminnm v4.4s, v3.4s, v2.4s
-; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v1.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
-; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <5 x float> @llvm.minimumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -972,17 +872,9 @@ entry:
 define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: min_v9f16:
 ; AARCH64:       // %bb.0: // %entry
-; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
-; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
-; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
-; AARCH64-NEXT:    add x9, sp, #16
-; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
-; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
-; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
-; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
-; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
+; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index d4130e7a848b15..a0b67f11c485fd 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -427,7 +427,6 @@ define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
 define void @test_insert_at_zero(half %a, ptr %b) #0 {
 ; CHECK-LABEL: test_insert_at_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %1 = insertelement <8 x half> undef, half %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
index 97c3a4937cda74..6aee101ebd2638 100644
--- a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
@@ -41,10 +41,7 @@ entry:
 define <4 x half> @lane_64_64(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: lane_64_64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[2]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
@@ -57,7 +54,6 @@ entry:
 define <8 x half> @lane_128_64(<8 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: lane_128_64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -75,9 +71,7 @@ entry:
 define <4 x half> @lane_64_128(<4 x half> %a, <8 x half> %b) #0 {
 ; CHECK-LABEL: lane_64_128:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[5]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast <8 x half> %b to <8 x i16>
@@ -194,7 +188,6 @@ entry:
 define <4 x half> @dup_64(half %a) #0 {
 ; CHECK-LABEL: dup_64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -209,7 +202,6 @@ entry:
 define <8 x half> @dup_128(half %a) #0 {
 ; CHECK-LABEL: dup_128:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -228,7 +220,6 @@ entry:
 define <4 x half> @dup_lane_64(<4 x half> %a) #0 {
 ; CHECK-LABEL: dup_lane_64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -240,7 +231,6 @@ entry:
 define <8 x half> @dup_lane_128(<4 x half> %a) #0 {
 ; CHECK-LABEL: dup_lane_128:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -274,8 +264,6 @@ entry:
 define <8 x half> @vcombine(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: vcombine:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -288,7 +276,6 @@ define <4 x half> @get_high(<8 x half> %a) #0 {
 ; CHECK-LABEL: get_high:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -300,7 +287,6 @@ entry:
 define <4 x half> @get_low(<8 x half> %a) #0 {
 ; CHECK-LABEL: get_low:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -311,11 +297,8 @@ entry:
 define <4 x half> @set_lane_64(<4 x half> %a, half %b) #0 {
 ; CHECK-LABEL: set_lane_64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fmov w8, s1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[2], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast half %b to i16
@@ -330,7 +313,6 @@ entry:
 define <8 x half> @set_lane_128(<8 x half> %a, half %b) #0 {
 ; CHECK-LABEL: set_lane_128:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    ret
@@ -346,10 +328,8 @@ entry:
 define half @get_lane_64(<4 x half> %a) #0 {
 ; CHECK-LABEL: get_lane_64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.h[2]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast <4 x half> %a to <4 x i16>
@@ -364,7 +344,6 @@ define half @get_lane_128(<8 x half> %a) #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umov w8, v0.h[2]
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast <8 x half> %a to <8 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
index 368683e2b93af4..8e1d67b750743a 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -11,7 +11,6 @@ declare half @llvm.fma.f16(half, half, half) #1
 define <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfma_lane_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -23,7 +22,6 @@ entry:
 define <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmaq_lane_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -57,7 +55,6 @@ entry:
 define <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfma_n_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -70,7 +67,6 @@ entry:
 define <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfmaq_n_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -83,7 +79,6 @@ entry:
 define half @t_vfmah_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmah_lane_f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd h0, h1, h2, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -95,7 +90,6 @@ entry:
 define half @t_vfmah_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmah_lane_f16_0_swap:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd h0, h2, h1, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -107,7 +101,6 @@ entry:
 define half @t_vfmah_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmah_lane_f16_3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -119,7 +112,6 @@ entry:
 define half @t_vfmah_lane_f16_3_0(half %a, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmah_lane_f16_3_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmla h0, h1, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -132,8 +124,6 @@ entry:
 define half @t_vfmah_lane_f16_0_0(half %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmah_lane_f16_0_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmadd h0, h1, h2, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -179,7 +169,6 @@ entry:
 define <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfms_lane_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -192,7 +181,6 @@ entry:
 define <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsq_lane_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -229,7 +217,6 @@ entry:
 define <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfms_n_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -243,7 +230,6 @@ entry:
 define <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfmsq_n_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -257,7 +243,6 @@ entry:
 define half @t_vfmsh_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub h0, h2, h1, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -270,7 +255,6 @@ entry:
 define half @t_vfmsh_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_0_swap:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub h0, h2, h1, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -283,7 +267,6 @@ entry:
 define half @t_vfmsh_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -308,7 +291,6 @@ entry:
 define half @t_vfmsh_lane_f16_0_3(half %a, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_0_3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmls h0, h1, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -368,7 +350,6 @@ entry:
 define half @t_vmulh_lane0_f16(half %a, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vmulh_lane0_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul h0, h0, h1
 ; CHECK-NEXT:    ret
 entry:
@@ -380,7 +361,6 @@ entry:
 define half @t_vmulh_lane3_f16(half %a, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vmulh_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul h0, h0, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -424,7 +404,6 @@ entry:
 define half @t_vmulxh_lane0_f16(half %a, <4 x half> %b) {
 ; CHECK-LABEL: t_vmulxh_lane0_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx h0, h0, h1
 ; CHECK-NEXT:    ret
 entry:
@@ -436,7 +415,6 @@ entry:
 define half @t_vmulxh_lane3_f16(half %a, <4 x half> %b, i32 %lane) {
 ; CHECK-LABEL: t_vmulxh_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx h0, h0, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -448,7 +426,6 @@ entry:
 define <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
 ; CHECK-LABEL: t_vmulx_lane_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -460,7 +437,6 @@ entry:
 define <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
 ; CHECK-LABEL: t_vmulxq_lane_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -516,7 +492,6 @@ entry:
 define <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
 ; CHECK-LABEL: t_vmulx_n_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    dup v1.4h, v1.h[0]
 ; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    ret
@@ -530,7 +505,6 @@ entry:
 define <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
 ; CHECK-LABEL: t_vmulxq_n_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    dup v1.8h, v1.h[0]
 ; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
@@ -544,7 +518,6 @@ entry:
 define half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmah_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -567,7 +540,6 @@ entry:
 define half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmsh_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -605,7 +577,6 @@ entry:
 define half @test_fmulx_horizontal_f16(<2 x half> %v) {
 ; CHECK-LABEL: test_fmulx_horizontal_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmulx h0, h0, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
index 2ffba10e211007..ccea80cd396bc3 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
@@ -7,8 +7,6 @@ target triple = "aarch64-linux"
 define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
 ; CHECK-LABEL: cvtn_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvtn z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
@@ -18,8 +16,6 @@ define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bf
 define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
 ; CHECK-LABEL: cvtn_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtn z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
@@ -29,8 +25,6 @@ define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half>
 define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
 ; CHECK-LABEL: cvtnb_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtnb z0.b, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)

diff  --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 9157bcba59e9bb..85c9a0cd881322 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -28,7 +28,6 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    fcvtzu w9, d1
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i64>
@@ -353,7 +352,6 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -385,7 +383,6 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -447,14 +444,12 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -485,9 +480,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -526,7 +519,6 @@ entry:
 define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-CVT-LABEL: stest_f16i64:
 ; CHECK-CVT:       // %bb.0: // %entry
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvt s1, h1
@@ -538,7 +530,6 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ;
 ; CHECK-FP16-LABEL: stest_f16i64:
 ; CHECK-FP16:       // %bb.0: // %entry
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-FP16-NEXT:    fcvtzs x8, h0
 ; CHECK-FP16-NEXT:    fcvtzs x9, h1
@@ -565,14 +556,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -603,9 +592,7 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixhfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -667,7 +654,6 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    fcvtzu w9, d1
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i64>
@@ -962,7 +948,6 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -993,7 +978,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -1049,14 +1033,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -1086,9 +1068,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -1123,7 +1103,6 @@ entry:
 define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-CVT-LABEL: stest_f16i64_mm:
 ; CHECK-CVT:       // %bb.0: // %entry
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvt s1, h1
@@ -1135,7 +1114,6 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ;
 ; CHECK-FP16-LABEL: stest_f16i64_mm:
 ; CHECK-FP16:       // %bb.0: // %entry
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-FP16-NEXT:    fcvtzs x8, h0
 ; CHECK-FP16-NEXT:    fcvtzs x9, h1
@@ -1160,14 +1138,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -1197,9 +1173,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixhfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0

diff  --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll
index df90f9d5f09109..2715f3d2f26b6a 100644
--- a/llvm/test/CodeGen/AArch64/fpext.ll
+++ b/llvm/test/CodeGen/AArch64/fpext.ll
@@ -74,10 +74,8 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v3.2d, v0.2s
 ; CHECK-SD-NEXT:    fcvtl2 v2.2d, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fpext_v3f32_v3f64:
@@ -86,7 +84,6 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) {
 ; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-GI-NEXT:    fcvt d2, s1
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fpext <3 x float> %a to <3 x double>
@@ -100,9 +97,7 @@ define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendhftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -137,11 +132,9 @@ define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendhftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -175,7 +168,6 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
@@ -184,7 +176,6 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -211,7 +202,6 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendsftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -244,7 +234,6 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -252,7 +241,6 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -277,14 +265,12 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
@@ -328,7 +314,6 @@ define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) {
 ;
 ; CHECK-GI-LABEL: fpext_v2f16_v2f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    fcvt d0, h0
 ; CHECK-GI-NEXT:    fcvt d1, h1
@@ -345,15 +330,11 @@ define <3 x double> @fpext_v3f16_v3f64(<3 x half> %a) {
 ; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtl v0.2d, v1.2s
 ; CHECK-SD-NEXT:    fcvtl2 v2.2d, v1.4s
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fpext_v3f16_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NEXT:    fcvt d0, h0
@@ -375,7 +356,6 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) {
 ;
 ; CHECK-GI-LABEL: fpext_v4f16_v4f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NEXT:    mov h3, v0.h[3]
@@ -395,7 +375,6 @@ define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
 ; CHECK-LABEL: fpext_v2f16_v2f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = fpext <2 x half> %a to <2 x float>

diff  --git a/llvm/test/CodeGen/AArch64/fpmode.ll b/llvm/test/CodeGen/AArch64/fpmode.ll
index b185d9e0494111..509bd5ca1d749a 100644
--- a/llvm/test/CodeGen/AArch64/fpmode.ll
+++ b/llvm/test/CodeGen/AArch64/fpmode.ll
@@ -10,7 +10,6 @@ define i32 @func_get_fpmode() #0 {
 ; DAG-LABEL: func_get_fpmode:
 ; DAG:       // %bb.0: // %entry
 ; DAG-NEXT:    mrs x0, FPCR
-; DAG-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; DAG-NEXT:    ret
 ;
 ; GIS-LABEL: func_get_fpmode:

diff  --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll
index dc93d5be9b3f38..4eeeb4d8fafeec 100644
--- a/llvm/test/CodeGen/AArch64/fpow.ll
+++ b/llvm/test/CodeGen/AArch64/fpow.ll
@@ -72,14 +72,10 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl pow
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl pow
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #64
@@ -96,16 +92,12 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    bl pow
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d1, d9
 ; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d9
 ; CHECK-GI-NEXT:    bl pow
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -206,25 +198,18 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    bl pow
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl pow
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl pow
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl pow
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp, #16] // 32-byte Folded Reload
@@ -252,35 +237,28 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d10, v2.d[1]
 ; CHECK-GI-NEXT:    mov d11, v3.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    mov d9, v4.d[1]
 ; CHECK-GI-NEXT:    bl pow
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d1, d10
 ; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d10
 ; CHECK-GI-NEXT:    bl pow
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl pow
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d1, d11
 ; CHECK-GI-NEXT:    fmov d0, d9
+; CHECK-GI-NEXT:    fmov d1, d11
 ; CHECK-GI-NEXT:    bl pow
-; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
@@ -296,23 +274,16 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -325,20 +296,14 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    .cfi_offset b8, -24
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s9
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s9
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -361,14 +326,10 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -376,7 +337,6 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -397,23 +357,18 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s10, v1.s[1]
 ; CHECK-GI-NEXT:    mov s11, v1.s[2]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s10
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s10
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
@@ -438,14 +393,10 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -453,7 +404,6 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -461,7 +411,6 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -487,28 +436,22 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s12
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s12
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
@@ -537,14 +480,10 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s1, v2.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -552,7 +491,6 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -560,21 +498,16 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -582,7 +515,6 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -620,7 +552,6 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v3.s[1]
 ; CHECK-GI-NEXT:    mov s11, v3.s[2]
 ; CHECK-GI-NEXT:    mov s2, v4.s[1]
@@ -629,50 +560,39 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s14, v1.s[1]
 ; CHECK-GI-NEXT:    mov s15, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    str s2, [sp, #48] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v4.s[2]
 ; CHECK-GI-NEXT:    str s2, [sp, #112] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v3.s[3]
 ; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s14
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s14
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s15
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s15
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    fmov s1, s12
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #48] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    fmov s1, s11
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #192] // 8-byte Folded Reload
@@ -894,8 +814,6 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
@@ -935,7 +853,6 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -954,8 +871,6 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    .cfi_offset b12, -56
 ; CHECK-GI-NEXT:    .cfi_offset b13, -64
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -993,8 +908,7 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1558,22 +1472,23 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
 ; CHECK-GI-NEXT:    fcvt s1, h0
 ; CHECK-GI-NEXT:    fmov s0, s2
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    ldr q3, [sp, #304] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #320] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #128] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fpowi.ll b/llvm/test/CodeGen/AArch64/fpowi.ll
index 8948556d1b380a..f5def57c1cfb90 100644
--- a/llvm/test/CodeGen/AArch64/fpowi.ll
+++ b/llvm/test/CodeGen/AArch64/fpowi.ll
@@ -71,15 +71,12 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powidf2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powidf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
@@ -95,15 +92,12 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov w19, w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __powidf2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powidf2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -195,25 +189,20 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powidf2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powidf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl __powidf2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powidf2
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -236,31 +225,26 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    mov w19, w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __powidf2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powidf2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powidf2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov d0, d9
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powidf2
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -277,22 +261,17 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
+; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -305,18 +284,14 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    .cfi_offset w19, -8
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov w19, w0
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -340,14 +315,11 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -355,7 +327,6 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -375,20 +346,16 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov w19, w0
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -413,14 +380,11 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -428,7 +392,6 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -436,7 +399,6 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -459,25 +421,20 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov w19, w0
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -505,14 +462,11 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
@@ -520,7 +474,6 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -528,21 +481,17 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powisf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -550,7 +499,6 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -589,46 +537,36 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov w19, w0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s11
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s12
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    fmov s0, s13
+; CHECK-GI-NEXT:    mov w0, w19
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #32] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #160] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
@@ -825,7 +763,6 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -860,7 +797,6 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -876,7 +812,6 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -24
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    .cfi_offset b10, -48
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov w19, w0
@@ -910,8 +845,7 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #96
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1387,20 +1321,21 @@ define <16 x half> @powi_v16f16(<16 x half> %a, i32 %b) {
 ; CHECK-GI-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x29, [sp, #304] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #320] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 9c4f0207b84ce8..5df93979877ecf 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -707,29 +707,18 @@ entry:
 define <3 x i64> @fptos_v3f64_v3i64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptos_v3f64_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v3f64_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i64>
@@ -739,29 +728,18 @@ entry:
 define <3 x i64> @fptou_v3f64_v3i64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v3f64_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v3f64_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i64>
@@ -1017,9 +995,6 @@ entry:
 define <3 x i32> @fptos_v3f64_v3i32(<3 x double> %a) {
 ; CHECK-LABEL: fptos_v3f64_v3i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1033,9 +1008,6 @@ entry:
 define <3 x i32> @fptou_v3f64_v3i32(<3 x double> %a) {
 ; CHECK-LABEL: fptou_v3f64_v3i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
@@ -1377,9 +1349,6 @@ entry:
 define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) {
 ; CHECK-LABEL: fptos_v3f64_v3i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1394,9 +1363,6 @@ entry:
 define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v3f64_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1406,9 +1372,6 @@ define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptou_v3f64_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
@@ -1470,12 +1433,12 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI70_0
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT:    xtn v6.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v5.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v4.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v3.2s, v0.2d
-; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI70_0]
-; CHECK-SD-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI70_0]
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v8f64_v8i16:
@@ -1501,12 +1464,12 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI71_0
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT:    xtn v6.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v5.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v4.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v3.2s, v0.2d
-; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI71_0]
-; CHECK-SD-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI71_0]
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v8f64_v8i16:
@@ -1532,21 +1495,21 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI72_0
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI72_0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    xtn v19.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v23.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v18.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v22.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v17.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v21.2s, v5.2d
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI72_0]
-; CHECK-SD-NEXT:    xtn v16.2s, v0.2d
-; CHECK-SD-NEXT:    xtn v20.2s, v4.2d
-; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v16f64_v16i16:
@@ -1579,21 +1542,21 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI73_0
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI73_0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    xtn v19.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v23.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v18.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v22.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v17.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v21.2s, v5.2d
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI73_0]
-; CHECK-SD-NEXT:    xtn v16.2s, v0.2d
-; CHECK-SD-NEXT:    xtn v20.2s, v4.2d
-; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v16f64_v16i16:
@@ -1621,65 +1584,48 @@ entry:
 define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) {
 ; CHECK-SD-LABEL: fptos_v32f64_v32i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-SD-NEXT:    .cfi_offset b8, -8
-; CHECK-SD-NEXT:    .cfi_offset b9, -16
-; CHECK-SD-NEXT:    .cfi_offset b10, -24
-; CHECK-SD-NEXT:    .cfi_offset b11, -32
-; CHECK-SD-NEXT:    .cfi_offset b12, -40
-; CHECK-SD-NEXT:    .cfi_offset b13, -48
-; CHECK-SD-NEXT:    .cfi_offset b14, -56
-; CHECK-SD-NEXT:    .cfi_offset b15, -64
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #64]
 ; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-SD-NEXT:    fcvtzs v18.2d, v2.2d
-; CHECK-SD-NEXT:    adrp x8, .LCPI74_0
-; CHECK-SD-NEXT:    fcvtzs v19.2d, v1.2d
-; CHECK-SD-NEXT:    ldp q20, q21, [sp, #160]
-; CHECK-SD-NEXT:    fcvtzs v22.2d, v0.2d
-; CHECK-SD-NEXT:    ldp q23, q24, [sp, #96]
+; CHECK-SD-NEXT:    ldp q18, q19, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q22, q23, [sp]
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-SD-NEXT:    ldp q16, q17, [sp, #128]
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
 ; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v18.2d
-; CHECK-SD-NEXT:    ldp q18, q25, [sp, #64]
-; CHECK-SD-NEXT:    xtn v1.2s, v19.2d
-; CHECK-SD-NEXT:    fcvtzs v19.2d, v24.2d
-; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v22.2d
-; CHECK-SD-NEXT:    fcvtzs v22.2d, v23.2d
-; CHECK-SD-NEXT:    xtn v29.2s, v7.2d
-; CHECK-SD-NEXT:    fcvtzs v7.2d, v25.2d
-; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
 ; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-SD-NEXT:    xtn v15.2s, v21.2d
-; CHECK-SD-NEXT:    xtn v11.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
 ; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    xtn v14.2s, v20.2d
-; CHECK-SD-NEXT:    xtn v10.2s, v22.2d
-; CHECK-SD-NEXT:    xtn v13.2s, v17.2d
-; CHECK-SD-NEXT:    xtn v9.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v28.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v8.2s, v18.2d
-; CHECK-SD-NEXT:    xtn v12.2s, v16.2d
-; CHECK-SD-NEXT:    xtn v27.2s, v5.2d
-; CHECK-SD-NEXT:    xtn v26.2s, v4.2d
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI74_0]
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
-; CHECK-SD-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
-; CHECK-SD-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
-; CHECK-SD-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
-; CHECK-SD-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI74_0
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v25.2s, v19.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v24.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v19.2s, v23.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v16.2d
+; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI74_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v32f64_v32i16:
@@ -1725,65 +1671,48 @@ entry:
 define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v32f64_v32i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-SD-NEXT:    .cfi_offset b8, -8
-; CHECK-SD-NEXT:    .cfi_offset b9, -16
-; CHECK-SD-NEXT:    .cfi_offset b10, -24
-; CHECK-SD-NEXT:    .cfi_offset b11, -32
-; CHECK-SD-NEXT:    .cfi_offset b12, -40
-; CHECK-SD-NEXT:    .cfi_offset b13, -48
-; CHECK-SD-NEXT:    .cfi_offset b14, -56
-; CHECK-SD-NEXT:    .cfi_offset b15, -64
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #64]
 ; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-SD-NEXT:    fcvtzs v18.2d, v2.2d
-; CHECK-SD-NEXT:    adrp x8, .LCPI75_0
-; CHECK-SD-NEXT:    fcvtzs v19.2d, v1.2d
-; CHECK-SD-NEXT:    ldp q20, q21, [sp, #160]
-; CHECK-SD-NEXT:    fcvtzs v22.2d, v0.2d
-; CHECK-SD-NEXT:    ldp q23, q24, [sp, #96]
+; CHECK-SD-NEXT:    ldp q18, q19, [sp, #96]
+; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #32]
+; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q22, q23, [sp]
+; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-SD-NEXT:    ldp q16, q17, [sp, #128]
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
 ; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v18.2d
-; CHECK-SD-NEXT:    ldp q18, q25, [sp, #64]
-; CHECK-SD-NEXT:    xtn v1.2s, v19.2d
-; CHECK-SD-NEXT:    fcvtzs v19.2d, v24.2d
-; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v22.2d
-; CHECK-SD-NEXT:    fcvtzs v22.2d, v23.2d
-; CHECK-SD-NEXT:    xtn v29.2s, v7.2d
-; CHECK-SD-NEXT:    fcvtzs v7.2d, v25.2d
-; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
 ; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
-; CHECK-SD-NEXT:    xtn v15.2s, v21.2d
-; CHECK-SD-NEXT:    xtn v11.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
 ; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    xtn v14.2s, v20.2d
-; CHECK-SD-NEXT:    xtn v10.2s, v22.2d
-; CHECK-SD-NEXT:    xtn v13.2s, v17.2d
-; CHECK-SD-NEXT:    xtn v9.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v28.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v8.2s, v18.2d
-; CHECK-SD-NEXT:    xtn v12.2s, v16.2d
-; CHECK-SD-NEXT:    xtn v27.2s, v5.2d
-; CHECK-SD-NEXT:    xtn v26.2s, v4.2d
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI75_0]
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
-; CHECK-SD-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
-; CHECK-SD-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
-; CHECK-SD-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
-; CHECK-SD-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI75_0
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v25.2s, v19.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v24.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v19.2s, v23.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v16.2d
+; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI75_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b
+; CHECK-SD-NEXT:    tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v32f64_v32i16:
@@ -1857,9 +1786,6 @@ entry:
 define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptos_v3f64_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1872,19 +1798,13 @@ define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptos_v3f64_v3i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fmov x2, d1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    mov d2, v0.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-GI-NEXT:    fmov x1, d2
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i8>
@@ -1894,9 +1814,6 @@ entry:
 define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v3f64_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1909,19 +1826,13 @@ define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptou_v3f64_v3i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fmov x2, d1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    mov d2, v0.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-GI-NEXT:    fmov x1, d2
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i8>
@@ -2287,7 +2198,6 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -2314,7 +2224,6 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixdfti
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -2344,7 +2253,6 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -2371,7 +2279,6 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunsdfti
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -2577,10 +2484,8 @@ define <3 x i64> @fptos_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-SD-NEXT:    fcvtl2 v0.2d, v0.4s
 ; CHECK-SD-NEXT:    fcvtzs v3.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v0.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v3f32_v3i64:
@@ -2591,8 +2496,6 @@ define <3 x i64> @fptos_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fcvtzs v2.2d, v1.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x float> %a to <3 x i64>
@@ -2606,10 +2509,8 @@ define <3 x i64> @fptou_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-SD-NEXT:    fcvtl2 v0.2d, v0.4s
 ; CHECK-SD-NEXT:    fcvtzu v3.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzu v2.2d, v0.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v3f32_v3i64:
@@ -2620,8 +2521,6 @@ define <3 x i64> @fptou_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fcvtzu v2.2d, v1.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x float> %a to <3 x i64>
@@ -3666,9 +3565,7 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -3694,9 +3591,7 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w20, -16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3725,9 +3620,7 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -3753,9 +3646,7 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w20, -16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3789,12 +3680,10 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
@@ -3829,7 +3718,6 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -64
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3870,12 +3758,10 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
@@ -3910,7 +3796,6 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -64
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3939,7 +3824,6 @@ entry:
 define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -3951,7 +3835,6 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
 ; CHECK-SD-FP16-NEXT:    fcvtzs x9, h1
@@ -3968,7 +3851,6 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d0, h0
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h1
@@ -3983,7 +3865,6 @@ entry:
 define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -3995,7 +3876,6 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
 ; CHECK-SD-FP16-NEXT:    fcvtzu x9, h1
@@ -4012,7 +3892,6 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d0, h0
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h1
@@ -4027,7 +3906,6 @@ entry:
 define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
@@ -4043,7 +3921,6 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
@@ -4061,14 +3938,11 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v1.2d
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NOFP16-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h0
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[2]
@@ -4076,12 +3950,10 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h3
-; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <3 x half> %a to <3 x i64>
@@ -4091,7 +3963,6 @@ entry:
 define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
@@ -4107,7 +3978,6 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
@@ -4125,14 +3995,11 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v1.2d
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NOFP16-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h0
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[2]
@@ -4140,12 +4007,10 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h3
-; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <3 x half> %a to <3 x i64>
@@ -4155,7 +4020,6 @@ entry:
 define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[3]
@@ -4175,7 +4039,6 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -4200,7 +4063,6 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -4221,7 +4083,6 @@ entry:
 define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[3]
@@ -4241,7 +4102,6 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -4266,7 +4126,6 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -5697,7 +5556,6 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v2f16_v2i32:
@@ -5715,7 +5573,6 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v2f16_v2i32:
@@ -5967,7 +5824,6 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16:
@@ -5980,7 +5836,6 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i16>
@@ -5992,7 +5847,6 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16:
@@ -6005,7 +5859,6 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i16>
@@ -6427,7 +6280,6 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8:
@@ -6440,7 +6292,6 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i8>
@@ -6452,7 +6303,6 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8:
@@ -6465,7 +6315,6 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i8>
@@ -6950,9 +6799,7 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -6970,7 +6817,6 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
@@ -6982,7 +6828,6 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-GI-FP16-NEXT:    fcvtzs x2, h1
@@ -7004,9 +6849,7 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -7024,11 +6867,10 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    mov x1, xzr
 ; CHECK-GI-NOFP16-NEXT:    mov x3, xzr
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu x0, s0
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu x2, s1
@@ -7036,7 +6878,6 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -7061,9 +6902,7 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -7089,7 +6928,6 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
@@ -7105,7 +6943,6 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
@@ -7133,9 +6970,7 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -7161,7 +6996,6 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NOFP16-NEXT:    mov x1, xzr
@@ -7177,7 +7011,6 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -7302,7 +7135,6 @@ define <3 x i64> @fptos_v3f128_v3i64(<3 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    fmov d1, x0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7359,7 +7191,6 @@ define <3 x i64> @fptou_v3f128_v3i64(<3 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    fmov d1, x0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7408,7 +7239,6 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7427,7 +7257,6 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7451,7 +7280,6 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7470,7 +7298,6 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7600,7 +7427,6 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7619,7 +7445,6 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7643,7 +7468,6 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7662,7 +7486,6 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7719,7 +7542,6 @@ define <3 x i16> @fptos_v3f128_v3i16(<3 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w20
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[2], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7776,7 +7598,6 @@ define <3 x i16> @fptou_v3f128_v3i16(<3 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w20
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[2], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7800,7 +7621,6 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7819,7 +7639,6 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7843,7 +7662,6 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7862,7 +7680,6 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 9ef6d61c350ecf..a79c2df4e1970a 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -32,7 +32,6 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzs w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -68,11 +67,6 @@ define <4 x i32> @test_signed_v4f32_v4i32(<4 x float> %f) {
 define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v5f32_v5i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    fcvtzs v4.4s, v4.4s
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -87,11 +81,6 @@ define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v5f32_v5i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    fcvtzs v1.4s, v4.4s
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -113,12 +102,6 @@ define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v6f32_v6i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -135,12 +118,6 @@ define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v6f32_v6i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -165,13 +142,6 @@ define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 define <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v7f32_v7i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -190,13 +160,6 @@ define <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v7f32_v7i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -253,7 +216,6 @@ define <1 x i32> @test_signed_v1f64_v1i32(<1 x double> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzs w8, d0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
@@ -267,7 +229,6 @@ define <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzs w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i32:
@@ -302,12 +263,9 @@ define <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v3f64_v3i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    adrp x8, .LCPI10_1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT:    adrp x8, .LCPI10_1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
@@ -375,14 +333,9 @@ define <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v5f64_v5i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    adrp x8, .LCPI12_1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI12_1
 ; CHECK-GI-NEXT:    fcvtzs v3.2d, v4.2d
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
@@ -405,14 +358,9 @@ define <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-GI-NEXT:    mov d4, v1.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d1
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    fmov x4, d2
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
-; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
-; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
 ; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptosi.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
@@ -431,15 +379,9 @@ define <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v6f64_v6i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-GI-NEXT:    adrp x8, .LCPI13_1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI13_1
 ; CHECK-GI-NEXT:    mov v4.d[1], v5.d[0]
 ; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI13_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
@@ -461,19 +403,13 @@ define <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-GI-NEXT:    bif v2.16b, v3.16b, v6.16b
 ; CHECK-GI-NEXT:    mov d3, v0.d[1]
 ; CHECK-GI-NEXT:    mov d4, v1.d[1]
-; CHECK-GI-NEXT:    mov d5, v2.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    mov d5, v2.d[1]
 ; CHECK-GI-NEXT:    fmov x2, d1
 ; CHECK-GI-NEXT:    fmov x4, d2
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
-; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
 ; CHECK-GI-NEXT:    fmov x5, d5
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
-; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
-; CHECK-GI-NEXT:    // kill: def $w5 killed $w5 killed $x5
 ; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptosi.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
@@ -565,7 +501,6 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-GI-NEXT:    csel w8, wzr, w19, ne
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f)
@@ -634,7 +569,6 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w22
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #112
 ; CHECK-SD-NEXT:    ret
 ;
@@ -714,7 +648,6 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f)
@@ -1190,14 +1123,12 @@ define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
 ; CHECK-GI-CVT-NEXT:    fcvt s0, h0
 ; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
 ; CHECK-GI-CVT-NEXT:    mov v0.s[0], w8
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-CVT-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v1f16_v1i32:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-GI-FP16-NEXT:    mov v0.s[0], w8
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
@@ -1208,7 +1139,6 @@ define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f16_v2i32:
@@ -1510,7 +1440,6 @@ define <2 x i32> @test_signed_v2f32_v2i32_duplicate(<2 x float> %f) {
 define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v2f32_v2i50:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
 ; CHECK-SD-NEXT:    fcvtzs x10, s0
@@ -1573,9 +1502,7 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -48
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1631,10 +1558,8 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -48
 ; CHECK-GI-NEXT:    .cfi_offset b9, -56
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1693,9 +1618,7 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -48
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1751,10 +1674,8 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -48
 ; CHECK-GI-NEXT:    .cfi_offset b9, -56
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v9.2s, #255, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1981,7 +1902,6 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2011,7 +1931,6 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-SD-NEXT:    fcmp s8, s8
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
 ; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
 ; CHECK-SD-NEXT:    bl __fixsfti
@@ -2083,7 +2002,6 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[1]
 ; CHECK-GI-NEXT:    mov s10, v0.s[2]
 ; CHECK-GI-NEXT:    mov s8, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v11.2s, #241, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2178,7 +2096,6 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2208,7 +2125,6 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-SD-NEXT:    fcmp s8, s8
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
 ; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
 ; CHECK-SD-NEXT:    bl __fixsfti
@@ -2280,7 +2196,6 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[1]
 ; CHECK-GI-NEXT:    mov s10, v0.s[2]
 ; CHECK-GI-NEXT:    mov s8, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v11.2s, #255, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2376,7 +2291,6 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csinv w9, w9, wzr, ge
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i1:
@@ -2411,7 +2325,6 @@ define <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i8:
@@ -2449,7 +2362,6 @@ define <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i13:
@@ -2487,7 +2399,6 @@ define <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i16:
@@ -2525,7 +2436,6 @@ define <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i19:
@@ -2553,7 +2463,6 @@ define <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzs w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i32_duplicate:
@@ -2637,7 +2546,6 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2696,7 +2604,6 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixdfti
 ; CHECK-GI-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2757,7 +2664,6 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2816,7 +2722,6 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixdfti
 ; CHECK-GI-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -3063,7 +2968,6 @@ define <4 x i32> @test_signed_v4f16_v4i32_duplicate(<4 x half> %f) {
 define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    fcvt s2, h0
 ; CHECK-SD-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3097,7 +3001,6 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fcvtzs x9, h0
 ; CHECK-SD-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3127,7 +3030,6 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    fcvt s2, h0
 ; CHECK-GI-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3161,7 +3063,6 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
 ; CHECK-GI-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3195,7 +3096,6 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i64:
 ; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
@@ -3215,7 +3115,6 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -3240,7 +3139,6 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -3281,7 +3179,6 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -80
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvt s8, h0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
@@ -3360,7 +3257,6 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i100:
 ; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -3380,7 +3276,6 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i100:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -3421,7 +3316,6 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -80
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvt s8, h0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
@@ -3500,7 +3394,6 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i128:
 ; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -3520,7 +3413,6 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i128:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -5115,111 +5007,111 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    csel w12, w13, w8, lt
 ; CHECK-SD-NEXT:    mov v0.s[1], w11
+; CHECK-SD-NEXT:    csel w12, w13, w8, lt
 ; CHECK-SD-NEXT:    fcvtzs w11, d1
 ; CHECK-SD-NEXT:    cmn w12, #128
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    fmov s1, w12
 ; CHECK-SD-NEXT:    fcvtzs w12, d2
 ; CHECK-SD-NEXT:    mov d2, v3.d[1]
-; CHECK-SD-NEXT:    cmp w11, #127
 ; CHECK-SD-NEXT:    mov w13, v0.s[1]
+; CHECK-SD-NEXT:    cmp w11, #127
 ; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    fcvtzs w11, d2
+; CHECK-SD-NEXT:    mov d2, v4.d[1]
+; CHECK-SD-NEXT:    mov v0.b[1], w13
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[1], w13
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w12, #128
 ; CHECK-SD-NEXT:    mov w13, v1.s[1]
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s2, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d3
-; CHECK-SD-NEXT:    mov d3, v4.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-SD-NEXT:    mov v2.s[1], w10
+; CHECK-SD-NEXT:    fmov s1, w12
+; CHECK-SD-NEXT:    fcvtzs w12, d3
+; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d3
+; CHECK-SD-NEXT:    mov v0.b[3], w13
+; CHECK-SD-NEXT:    mov d2, v5.d[1]
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    mov v0.b[3], w13
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v2.s[1]
+; CHECK-SD-NEXT:    mov w13, v1.s[1]
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
+; CHECK-SD-NEXT:    mov v0.b[4], v1.b[0]
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s3, w12
+; CHECK-SD-NEXT:    fmov s1, w12
 ; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
-; CHECK-SD-NEXT:    mov d4, v5.d[1]
-; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
-; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    mov v0.b[5], w13
+; CHECK-SD-NEXT:    cmn w10, #128
+; CHECK-SD-NEXT:    fcvtzs w11, d2
+; CHECK-SD-NEXT:    mov d2, v6.d[1]
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w11, d4
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v3.s[1]
+; CHECK-SD-NEXT:    mov w13, v1.s[1]
+; CHECK-SD-NEXT:    mov v0.b[6], v1.b[0]
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
-; CHECK-SD-NEXT:    fmov s4, w12
-; CHECK-SD-NEXT:    fcvtzs w12, d5
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov d5, v6.d[1]
-; CHECK-SD-NEXT:    mov v4.s[1], w10
-; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    fmov s1, w12
+; CHECK-SD-NEXT:    fcvtzs w12, d5
 ; CHECK-SD-NEXT:    mov v0.b[7], w13
+; CHECK-SD-NEXT:    fcvtzs w13, d2
+; CHECK-SD-NEXT:    mov d2, v7.d[1]
+; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    csel w10, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d5
 ; CHECK-SD-NEXT:    csel w11, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov w12, v4.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
+; CHECK-SD-NEXT:    mov w12, v1.s[1]
+; CHECK-SD-NEXT:    mov v0.b[8], v1.b[0]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s5, w11
-; CHECK-SD-NEXT:    fcvtzs w11, d6
 ; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov d6, v7.d[1]
+; CHECK-SD-NEXT:    fmov s1, w11
+; CHECK-SD-NEXT:    fcvtzs w11, d6
 ; CHECK-SD-NEXT:    mov v0.b[9], w12
-; CHECK-SD-NEXT:    mov v5.s[1], w10
+; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w13, w8, lt
+; CHECK-SD-NEXT:    fcvtzs w13, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fcvtzs w13, d6
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov v0.b[10], v5.b[0]
-; CHECK-SD-NEXT:    mov w12, v5.s[1]
+; CHECK-SD-NEXT:    mov v0.b[10], v1.b[0]
+; CHECK-SD-NEXT:    mov w12, v1.s[1]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    fmov s6, w11
-; CHECK-SD-NEXT:    fcvtzs w11, d7
 ; CHECK-SD-NEXT:    cmp w13, #127
+; CHECK-SD-NEXT:    fmov s1, w11
+; CHECK-SD-NEXT:    fcvtzs w11, d7
 ; CHECK-SD-NEXT:    mov v0.b[11], w12
-; CHECK-SD-NEXT:    mov v6.s[1], w10
+; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w13, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
 ; CHECK-SD-NEXT:    csel w8, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w8, #128
-; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT:    mov w11, v6.s[1]
+; CHECK-SD-NEXT:    mov v0.b[12], v1.b[0]
+; CHECK-SD-NEXT:    mov w11, v1.s[1]
 ; CHECK-SD-NEXT:    csel w8, w8, w9, gt
-; CHECK-SD-NEXT:    fmov s7, w8
+; CHECK-SD-NEXT:    fmov s1, w8
 ; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v7.s[1], w10
-; CHECK-SD-NEXT:    mov v0.b[14], v7.b[0]
-; CHECK-SD-NEXT:    mov w8, v7.s[1]
+; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    mov v0.b[14], v1.b[0]
+; CHECK-SD-NEXT:    mov w8, v1.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5411,26 +5303,32 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w13, w9
 ; CHECK-SD-NEXT:    csel w11, w13, w9, lt
 ; CHECK-SD-NEXT:    fcvtzs w13, d3
+; CHECK-SD-NEXT:    fmov s3, w12
 ; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w11, w11, w8, gt
 ; CHECK-SD-NEXT:    cmp w14, w9
 ; CHECK-SD-NEXT:    csel w14, w14, w9, lt
+; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    cmn w14, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w14, w14, w8, gt
 ; CHECK-SD-NEXT:    cmp w13, w9
 ; CHECK-SD-NEXT:    csel w13, w13, w9, lt
+; CHECK-SD-NEXT:    fmov s2, w14
 ; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w13, w13, w8, gt
 ; CHECK-SD-NEXT:    cmp w15, w9
 ; CHECK-SD-NEXT:    csel w15, w15, w9, lt
+; CHECK-SD-NEXT:    mov v2.s[1], w11
 ; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w16, w15, w8, gt
 ; CHECK-SD-NEXT:    cmp w17, w9
 ; CHECK-SD-NEXT:    csel w15, w17, w9, lt
+; CHECK-SD-NEXT:    fmov s1, w16
 ; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w15, w15, w8, gt
 ; CHECK-SD-NEXT:    cmp w18, w9
 ; CHECK-SD-NEXT:    csel w17, w18, w9, lt
+; CHECK-SD-NEXT:    mov v1.s[1], w13
 ; CHECK-SD-NEXT:    cmn w17, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w17, w17, w8, gt
 ; CHECK-SD-NEXT:    cmp w0, w9
@@ -5453,38 +5351,32 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w2, w9
 ; CHECK-SD-NEXT:    fcvtzs w5, d0
 ; CHECK-SD-NEXT:    csel w2, w2, w9, lt
-; CHECK-SD-NEXT:    fmov s3, w12
+; CHECK-SD-NEXT:    fmov s0, w17
 ; CHECK-SD-NEXT:    mov v7.s[1], w18
 ; CHECK-SD-NEXT:    cmn w2, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w2, w2, w8, gt
 ; CHECK-SD-NEXT:    cmp w3, w9
 ; CHECK-SD-NEXT:    csel w3, w3, w9, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    fmov s6, w2
+; CHECK-SD-NEXT:    mov v0.s[1], w15
 ; CHECK-SD-NEXT:    cmn w3, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    fmov s2, w14
 ; CHECK-SD-NEXT:    csel w3, w3, w8, gt
 ; CHECK-SD-NEXT:    cmp w4, w9
 ; CHECK-SD-NEXT:    csel w4, w4, w9, lt
 ; CHECK-SD-NEXT:    mov v6.s[1], w0
 ; CHECK-SD-NEXT:    cmn w4, #8, lsl #12 // =32768
-; CHECK-SD-NEXT:    mov v2.s[1], w11
 ; CHECK-SD-NEXT:    csel w12, w4, w8, gt
 ; CHECK-SD-NEXT:    cmp w5, w9
-; CHECK-SD-NEXT:    fmov s1, w16
 ; CHECK-SD-NEXT:    csel w10, w5, w9, lt
 ; CHECK-SD-NEXT:    fmov s5, w12
 ; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w10, w10, w8, gt
 ; CHECK-SD-NEXT:    cmp w6, w9
-; CHECK-SD-NEXT:    mov v1.s[1], w13
 ; CHECK-SD-NEXT:    csel w9, w6, w9, lt
 ; CHECK-SD-NEXT:    mov v5.s[1], w3
-; CHECK-SD-NEXT:    fmov s0, w17
 ; CHECK-SD-NEXT:    cmn w9, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w8, w9, w8, gt
 ; CHECK-SD-NEXT:    fmov s4, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w15
 ; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
 ; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI85_0]
 ; CHECK-SD-NEXT:    mov v4.s[1], w10

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index e1670ad2dc053b..45133667df6ddc 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -32,7 +32,6 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzu w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -68,11 +67,6 @@ define <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
 define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v5f32_v5i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    fcvtzu v4.4s, v4.4s
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -87,11 +81,6 @@ define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v5f32_v5i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.4s, v4.4s
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -113,12 +102,6 @@ define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v6f32_v6i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -135,12 +118,6 @@ define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v6f32_v6i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -165,13 +142,6 @@ define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v7f32_v7i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -190,13 +160,6 @@ define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v7f32_v7i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
-; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -253,7 +216,6 @@ define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzu w8, d0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
@@ -267,7 +229,6 @@ define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i32:
@@ -297,9 +258,6 @@ define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v3f64_v3i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NEXT:    movi v2.2d, #0x000000ffffffff
@@ -356,11 +314,6 @@ define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v5f64_v5i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    movi v1.2d, #0x000000ffffffff
@@ -377,14 +330,9 @@ define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-GI-NEXT:    mov d4, v2.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d2
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    fmov x4, d1
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
-; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
-; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
 ; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
@@ -403,12 +351,6 @@ define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v6f64_v6i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v4.d[1], v5.d[0]
@@ -428,15 +370,9 @@ define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d2
 ; CHECK-GI-NEXT:    fmov x4, d1
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
-; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
 ; CHECK-GI-NEXT:    fmov x5, d5
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
-; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
-; CHECK-GI-NEXT:    // kill: def $w5 killed $w5 killed $x5
 ; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
@@ -514,7 +450,6 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f)
@@ -567,7 +502,6 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #96
 ; CHECK-SD-NEXT:    ret
 ;
@@ -633,7 +567,6 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #96
 ; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f)
@@ -1021,14 +954,12 @@ define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
 ; CHECK-GI-CVT-NEXT:    fcvt s0, h0
 ; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
 ; CHECK-GI-CVT-NEXT:    mov v0.s[0], w8
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-CVT-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v1f16_v1i32:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-GI-FP16-NEXT:    mov v0.s[0], w8
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
@@ -1039,7 +970,6 @@ define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f16_v2i32:
@@ -1286,7 +1216,6 @@ define <2 x i32> @test_unsigned_v2f32_v2i32_duplicate(<2 x float> %f) {
 define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v2f32_v2i50:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    fcvtzu x9, s0
 ; CHECK-SD-NEXT:    mov x10, #1125899906842623 // =0x3ffffffffffff
@@ -1336,9 +1265,7 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1380,10 +1307,8 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -32
 ; CHECK-GI-NEXT:    .cfi_offset b8, -40
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1427,9 +1352,7 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1469,10 +1392,8 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -32
 ; CHECK-GI-NEXT:    .cfi_offset b8, -40
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1655,7 +1576,6 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1679,7 +1599,6 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    csel x21, x25, x9, gt
 ; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -1737,7 +1656,6 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1811,7 +1729,6 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1834,7 +1751,6 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    csinv x21, x9, xzr, le
 ; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -1891,7 +1807,6 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1970,7 +1885,6 @@ define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csinc w9, w9, wzr, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i1:
@@ -1999,7 +1913,6 @@ define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w10, w8, lo
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i8:
@@ -2027,7 +1940,6 @@ define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i13:
@@ -2056,7 +1968,6 @@ define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i16:
@@ -2084,7 +1995,6 @@ define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i19:
@@ -2108,7 +2018,6 @@ define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i32_duplicate:
@@ -2174,7 +2083,6 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
@@ -2218,7 +2126,6 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunsdfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
@@ -2263,7 +2170,6 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
@@ -2305,7 +2211,6 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunsdfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
@@ -2512,7 +2417,6 @@ define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) {
 define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2537,7 +2441,6 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2558,7 +2461,6 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2583,7 +2485,6 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2608,7 +2509,6 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i64:
 ; CHECK-SD-CVT:       // %bb.0:
-; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
@@ -2628,7 +2528,6 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -2653,7 +2552,6 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -2690,7 +2588,6 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -64
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s8, h1
@@ -2753,7 +2650,6 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i100:
 ; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -2773,7 +2669,6 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i100:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -2809,7 +2704,6 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -64
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvt s8, h0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
@@ -2871,7 +2765,6 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i128:
 ; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -2891,7 +2784,6 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i128:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -4161,8 +4053,8 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w12, d2
 ; CHECK-SD-NEXT:    fcvtzu w14, d1
 ; CHECK-SD-NEXT:    fcvtzu w8, d4
-; CHECK-SD-NEXT:    mov d4, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w10, d5
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w13, d3
 ; CHECK-SD-NEXT:    cmp w8, #255
 ; CHECK-SD-NEXT:    fcvtzu w15, d4
@@ -4170,29 +4062,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w11, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    fmov s4, w9
+; CHECK-SD-NEXT:    fmov s3, w9
 ; CHECK-SD-NEXT:    csel w9, w10, w11, lo
 ; CHECK-SD-NEXT:    cmp w12, #255
 ; CHECK-SD-NEXT:    fcvtzu w10, d0
-; CHECK-SD-NEXT:    mov v4.s[1], w8
+; CHECK-SD-NEXT:    mov v3.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w12, w11, lo
 ; CHECK-SD-NEXT:    cmp w13, #255
-; CHECK-SD-NEXT:    fmov s3, w8
+; CHECK-SD-NEXT:    fmov s2, w8
 ; CHECK-SD-NEXT:    csel w8, w13, w11, lo
 ; CHECK-SD-NEXT:    cmp w14, #255
-; CHECK-SD-NEXT:    mov v3.s[1], w9
+; CHECK-SD-NEXT:    mov v2.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w14, w11, lo
 ; CHECK-SD-NEXT:    cmp w15, #255
-; CHECK-SD-NEXT:    fmov s2, w9
+; CHECK-SD-NEXT:    fmov s1, w9
 ; CHECK-SD-NEXT:    csel w9, w15, w11, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov v2.s[1], w8
+; CHECK-SD-NEXT:    mov v1.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w10, w11, lo
-; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
-; CHECK-SD-NEXT:    ldr d0, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT:    mov v1.s[1], w9
-; CHECK-SD-NEXT:    tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
+; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i8:
@@ -4233,29 +4125,29 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    fmov s0, w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d16
-; CHECK-SD-NEXT:    mov d16, v2.d[1]
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    mov w11, v0.s[1]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s1, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d16
-; CHECK-SD-NEXT:    mov d16, v3.d[1]
+; CHECK-SD-NEXT:    fmov s16, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    mov d1, v3.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[1], w11
-; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    mov v16.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d2
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov w11, v1.s[1]
-; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-SD-NEXT:    mov w11, v16.s[1]
+; CHECK-SD-NEXT:    mov v0.b[2], v16.b[0]
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    fmov s2, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d16
-; CHECK-SD-NEXT:    mov d16, v4.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d1
+; CHECK-SD-NEXT:    mov d1, v4.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[3], w11
 ; CHECK-SD-NEXT:    mov v2.s[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d3
@@ -4265,58 +4157,58 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    mov w11, v2.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s3, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d16
-; CHECK-SD-NEXT:    mov d16, v5.d[1]
+; CHECK-SD-NEXT:    fmov s2, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    mov d1, v5.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[5], w11
-; CHECK-SD-NEXT:    mov v3.s[1], w10
+; CHECK-SD-NEXT:    mov v2.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d4
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov w11, v3.s[1]
-; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
+; CHECK-SD-NEXT:    mov w11, v2.s[1]
+; CHECK-SD-NEXT:    mov v0.b[6], v2.b[0]
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s4, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d16
+; CHECK-SD-NEXT:    fmov s2, w10
+; CHECK-SD-NEXT:    fcvtzu w10, d1
+; CHECK-SD-NEXT:    mov d1, v6.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[7], w11
-; CHECK-SD-NEXT:    mov v4.s[1], w9
+; CHECK-SD-NEXT:    mov v2.s[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d5
-; CHECK-SD-NEXT:    mov d5, v6.d[1]
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov w11, v4.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
+; CHECK-SD-NEXT:    mov w11, v2.s[1]
+; CHECK-SD-NEXT:    mov v0.b[8], v2.b[0]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s16, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d5
-; CHECK-SD-NEXT:    mov d5, v7.d[1]
+; CHECK-SD-NEXT:    fmov s2, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    mov d1, v7.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[9], w11
-; CHECK-SD-NEXT:    mov v16.s[1], w10
+; CHECK-SD-NEXT:    mov v2.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d6
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov v0.b[10], v16.b[0]
-; CHECK-SD-NEXT:    mov w11, v16.s[1]
+; CHECK-SD-NEXT:    mov v0.b[10], v2.b[0]
+; CHECK-SD-NEXT:    mov w11, v2.s[1]
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s6, w10
+; CHECK-SD-NEXT:    fmov s2, w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d7
 ; CHECK-SD-NEXT:    mov v0.b[11], w11
-; CHECK-SD-NEXT:    mov v6.s[1], w9
-; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    mov v2.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
-; CHECK-SD-NEXT:    mov w11, v6.s[1]
+; CHECK-SD-NEXT:    mov v0.b[12], v2.b[0]
+; CHECK-SD-NEXT:    mov w11, v2.s[1]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w8, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s5, w8
+; CHECK-SD-NEXT:    fmov s1, w8
 ; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v5.s[1], w9
-; CHECK-SD-NEXT:    mov v0.b[14], v5.b[0]
-; CHECK-SD-NEXT:    mov w8, v5.s[1]
+; CHECK-SD-NEXT:    mov v1.s[1], w9
+; CHECK-SD-NEXT:    mov v0.b[14], v1.b[0]
+; CHECK-SD-NEXT:    mov w8, v1.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4370,8 +4262,8 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w12, d2
 ; CHECK-SD-NEXT:    fcvtzu w14, d1
 ; CHECK-SD-NEXT:    fcvtzu w8, d4
-; CHECK-SD-NEXT:    mov d4, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w11, d5
+; CHECK-SD-NEXT:    mov d4, v0.d[1]
 ; CHECK-SD-NEXT:    fcvtzu w13, d3
 ; CHECK-SD-NEXT:    cmp w8, w10
 ; CHECK-SD-NEXT:    fcvtzu w15, d4
@@ -4379,29 +4271,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w9, w10
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    cmp w11, w10
-; CHECK-SD-NEXT:    fmov s4, w9
+; CHECK-SD-NEXT:    fmov s3, w9
 ; CHECK-SD-NEXT:    csel w9, w11, w10, lo
 ; CHECK-SD-NEXT:    cmp w12, w10
 ; CHECK-SD-NEXT:    fcvtzu w11, d0
-; CHECK-SD-NEXT:    mov v4.s[1], w8
+; CHECK-SD-NEXT:    mov v3.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w12, w10, lo
 ; CHECK-SD-NEXT:    cmp w13, w10
-; CHECK-SD-NEXT:    fmov s3, w8
+; CHECK-SD-NEXT:    fmov s2, w8
 ; CHECK-SD-NEXT:    csel w8, w13, w10, lo
 ; CHECK-SD-NEXT:    cmp w14, w10
-; CHECK-SD-NEXT:    mov v3.s[1], w9
+; CHECK-SD-NEXT:    mov v2.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w14, w10, lo
 ; CHECK-SD-NEXT:    cmp w15, w10
-; CHECK-SD-NEXT:    fmov s2, w9
+; CHECK-SD-NEXT:    fmov s1, w9
 ; CHECK-SD-NEXT:    csel w9, w15, w10, lo
 ; CHECK-SD-NEXT:    cmp w11, w10
-; CHECK-SD-NEXT:    mov v2.s[1], w8
+; CHECK-SD-NEXT:    mov v1.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w11, w10, lo
-; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    adrp x8, .LCPI84_0
-; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI84_0]
-; CHECK-SD-NEXT:    mov v1.s[1], w9
-; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI84_0]
+; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i16:
@@ -4460,53 +4352,53 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w16, d0
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lo
 ; CHECK-SD-NEXT:    cmp w17, w8
+; CHECK-SD-NEXT:    fmov s18, w11
 ; CHECK-SD-NEXT:    mov v19.s[1], w13
 ; CHECK-SD-NEXT:    csel w13, w17, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, w8
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w18, w8
-; CHECK-SD-NEXT:    fmov s18, w11
+; CHECK-SD-NEXT:    fcvtzu w17, d2
 ; CHECK-SD-NEXT:    csel w11, w18, w8, lo
 ; CHECK-SD-NEXT:    cmp w12, w8
-; CHECK-SD-NEXT:    fcvtzu w17, d2
+; CHECK-SD-NEXT:    mov v18.s[1], w9
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lo
 ; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    fcvtzu w18, d6
-; CHECK-SD-NEXT:    mov v18.s[1], w9
+; CHECK-SD-NEXT:    fmov s17, w10
 ; CHECK-SD-NEXT:    csel w9, w16, w8, lo
 ; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    fmov s17, w10
-; CHECK-SD-NEXT:    csel w10, w14, w8, lo
 ; CHECK-SD-NEXT:    fcvtzu w16, d5
-; CHECK-SD-NEXT:    fmov s23, w10
+; CHECK-SD-NEXT:    csel w10, w14, w8, lo
+; CHECK-SD-NEXT:    fcvtzu w18, d6
 ; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    fcvtzu w14, d3
+; CHECK-SD-NEXT:    fmov s5, w10
 ; CHECK-SD-NEXT:    csel w10, w17, w8, lo
 ; CHECK-SD-NEXT:    cmp w15, w8
+; CHECK-SD-NEXT:    fcvtzu w14, d3
 ; CHECK-SD-NEXT:    fcvtzu w17, d4
+; CHECK-SD-NEXT:    fmov s16, w12
 ; CHECK-SD-NEXT:    mov v17.s[1], w13
-; CHECK-SD-NEXT:    mov v23.s[1], w9
+; CHECK-SD-NEXT:    mov v5.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w15, w8, lo
 ; CHECK-SD-NEXT:    cmp w18, w8
-; CHECK-SD-NEXT:    fmov s22, w9
+; CHECK-SD-NEXT:    fmov s4, w9
 ; CHECK-SD-NEXT:    csel w9, w18, w8, lo
 ; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    fmov s16, w12
-; CHECK-SD-NEXT:    mov v22.s[1], w10
+; CHECK-SD-NEXT:    mov v16.s[1], w11
+; CHECK-SD-NEXT:    mov v4.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w16, w8, lo
 ; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    fmov s21, w10
+; CHECK-SD-NEXT:    fmov s3, w10
 ; CHECK-SD-NEXT:    csel w10, w14, w8, lo
 ; CHECK-SD-NEXT:    cmp w17, w8
 ; CHECK-SD-NEXT:    csel w8, w17, w8, lo
-; CHECK-SD-NEXT:    mov v16.s[1], w11
-; CHECK-SD-NEXT:    mov v21.s[1], w9
-; CHECK-SD-NEXT:    fmov s20, w8
+; CHECK-SD-NEXT:    fmov s2, w8
 ; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
+; CHECK-SD-NEXT:    mov v3.s[1], w9
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
-; CHECK-SD-NEXT:    mov v20.s[1], w10
+; CHECK-SD-NEXT:    mov v2.s[1], w10
 ; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v16f64_v16i16:

diff  --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index 2187717c4148ae..84c008d34d5f9b 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -97,15 +97,12 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfhf2
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfhf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -120,11 +117,9 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfhf2
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
@@ -133,7 +128,6 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -152,15 +146,12 @@ define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfsf2
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfsf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -172,12 +163,10 @@ define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfsf2
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfsf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    fmov d0, d1
@@ -199,12 +188,10 @@ define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfdf2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfdf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -218,12 +205,10 @@ define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfdf2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfdf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v1.16b
@@ -247,9 +232,6 @@ entry:
 define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptrunc_v3f64_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-SD-NEXT:    fcvtn2 v0.4s, v2.2d
@@ -257,10 +239,8 @@ define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptrunc_v3f64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    fcvt s2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fcvt s2, d2
 ; CHECK-GI-NEXT:    fcvtn v1.2s, v0.2d
 ; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[1]
@@ -295,7 +275,6 @@ define <2 x half> @fptrunc_v2f64_v2f16(<2 x double> %a) {
 ; CHECK-GI-NEXT:    fcvt h0, d0
 ; CHECK-GI-NEXT:    fcvt h1, d1
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptrunc <2 x double> %a to <2 x half>
@@ -310,7 +289,6 @@ define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-NEXT:    fcvt h1, d2
 ; CHECK-SD-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptrunc_v3f64_v3f16:
@@ -320,7 +298,6 @@ define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
 ; CHECK-GI-NEXT:    fcvt h2, d2
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptrunc <3 x double> %a to <3 x half>
@@ -346,7 +323,6 @@ define <4 x half> @fptrunc_v4f64_v4f16(<4 x double> %a) {
 ; CHECK-GI-NEXT:    fcvt h2, d3
 ; CHECK-GI-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-GI-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptrunc <4 x double> %a to <4 x half>
@@ -356,13 +332,11 @@ entry:
 define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
 ; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    fcvtn v0.4h, v1.4s

diff  --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index 6efd9f40f00688..eb8f961b369943 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -243,9 +243,8 @@ define <3 x i64> @freeze_v3i64() {
 ; CHECK-SD-LABEL: freeze_v3i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v0.2d
-; CHECK-SD-NEXT:    fmov d2, d0
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d2, d0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: freeze_v3i64:
@@ -254,7 +253,6 @@ define <3 x i64> @freeze_v3i64() {
 ; CHECK-GI-NEXT:    add x8, x8, x8
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %y1 = freeze <3 x i64> undef
   %t1 = add <3 x i64> %y1, %y1
@@ -297,10 +295,8 @@ define <3 x ptr> @freeze_v3p0() {
 ; CHECK-SD-NEXT:    mov w8, #4 // =0x4
 ; CHECK-SD-NEXT:    dup v2.2d, x8
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT:    add d2, d0, d2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    add d2, d0, d2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: freeze_v3p0:

diff  --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 98276b68481a10..29a86fd5b27b80 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -6,14 +6,12 @@ define float @frem2(float %x) {
 ; CHECK-SD-LABEL: frem2:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s1, #0.50000000
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    fmov s2, #-2.00000000
 ; CHECK-SD-NEXT:    fmul s1, s0, s1
 ; CHECK-SD-NEXT:    frintz s1, s1
 ; CHECK-SD-NEXT:    fmadd s1, s1, s2, s0
 ; CHECK-SD-NEXT:    mvni v2.4s, #128, lsl #24
 ; CHECK-SD-NEXT:    bit v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem2:
@@ -366,25 +364,20 @@ define <4 x float> @frem2_vec(<4 x float> %x) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -425,25 +418,20 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, #2.00000000
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -491,25 +479,20 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll
index feb13da64cbf8a..a08178dada813b 100644
--- a/llvm/test/CodeGen/AArch64/frem.ll
+++ b/llvm/test/CodeGen/AArch64/frem.ll
@@ -73,14 +73,10 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl fmod
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl fmod
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #64
@@ -97,16 +93,12 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    bl fmod
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d1, d9
 ; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d9
 ; CHECK-GI-NEXT:    bl fmod
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -207,25 +199,18 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    bl fmod
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl fmod
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl fmod
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl fmod
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp, #16] // 32-byte Folded Reload
@@ -253,35 +238,28 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d10, v2.d[1]
 ; CHECK-GI-NEXT:    mov d11, v3.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    mov d9, v4.d[1]
 ; CHECK-GI-NEXT:    bl fmod
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d1, d10
 ; CHECK-GI-NEXT:    fmov d0, d8
+; CHECK-GI-NEXT:    fmov d1, d10
 ; CHECK-GI-NEXT:    bl fmod
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmod
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d1, d11
 ; CHECK-GI-NEXT:    fmov d0, d9
+; CHECK-GI-NEXT:    fmov d1, d11
 ; CHECK-GI-NEXT:    bl fmod
-; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
@@ -297,23 +275,16 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -326,20 +297,14 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    .cfi_offset b8, -24
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s9
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s9
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -362,14 +327,10 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -377,7 +338,6 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -398,23 +358,18 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s10, v1.s[1]
 ; CHECK-GI-NEXT:    mov s11, v1.s[2]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s10
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s10
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
@@ -439,14 +394,10 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -454,7 +405,6 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -462,7 +412,6 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -488,28 +437,22 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s11
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s12
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s12
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
@@ -538,14 +481,10 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s1, v2.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -553,7 +492,6 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -561,21 +499,16 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -583,7 +516,6 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -621,7 +553,6 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v3.s[1]
 ; CHECK-GI-NEXT:    mov s11, v3.s[2]
 ; CHECK-GI-NEXT:    mov s2, v4.s[1]
@@ -630,50 +561,39 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s14, v1.s[1]
 ; CHECK-GI-NEXT:    mov s15, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    str s2, [sp, #48] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v4.s[2]
 ; CHECK-GI-NEXT:    str s2, [sp, #112] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v3.s[3]
 ; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s14
 ; CHECK-GI-NEXT:    fmov s0, s8
+; CHECK-GI-NEXT:    fmov s1, s14
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s15
 ; CHECK-GI-NEXT:    fmov s0, s9
+; CHECK-GI-NEXT:    fmov s1, s15
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    fmov s0, s10
+; CHECK-GI-NEXT:    fmov s1, s13
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    fmov s1, s12
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #48] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    fmov s1, s11
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #192] // 8-byte Folded Reload
@@ -895,8 +815,6 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
@@ -936,7 +854,6 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -955,8 +872,6 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    .cfi_offset b12, -56
 ; CHECK-GI-NEXT:    .cfi_offset b13, -64
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -994,8 +909,7 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1559,22 +1473,23 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) {
 ; CHECK-GI-NEXT:    fcvt s1, h0
 ; CHECK-GI-NEXT:    fmov s0, s2
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    ldr q3, [sp, #304] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
+; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #320] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #128] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fsincos.ll b/llvm/test/CodeGen/AArch64/fsincos.ll
index 2afc56a7139fbf..57e83c04dac4c5 100644
--- a/llvm/test/CodeGen/AArch64/fsincos.ll
+++ b/llvm/test/CodeGen/AArch64/fsincos.ll
@@ -68,13 +68,10 @@ define <2 x double> @sin_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl sin
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl sin
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -89,14 +86,11 @@ define <2 x double> @sin_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl sin
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -178,22 +172,17 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sin
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl sin
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl sin
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl sin
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -214,28 +203,23 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl sin
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl sin
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -251,20 +235,15 @@ define <2 x float> @sin_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -276,16 +255,12 @@ define <2 x float> @sin_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -307,20 +282,16 @@ define <3 x float> @sin_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -338,18 +309,14 @@ define <3 x float> @sin_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -372,27 +339,22 @@ define <4 x float> @sin_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -413,22 +375,17 @@ define <4 x float> @sin_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -454,46 +411,37 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -527,41 +475,31 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -740,7 +678,6 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -771,7 +708,6 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -786,7 +722,6 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -816,8 +751,7 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1240,19 +1174,20 @@ define <16 x half> @sin_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -1370,13 +1305,10 @@ define <2 x double> @cos_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl cos
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl cos
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1391,14 +1323,11 @@ define <2 x double> @cos_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl cos
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1480,22 +1409,17 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cos
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl cos
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl cos
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl cos
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -1516,28 +1440,23 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl cos
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl cos
-; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    fmov d2, d1
+; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -1553,20 +1472,15 @@ define <2 x float> @cos_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1578,16 +1492,12 @@ define <2 x float> @cos_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -1609,20 +1519,16 @@ define <3 x float> @cos_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1640,18 +1546,14 @@ define <3 x float> @cos_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -1674,27 +1576,22 @@ define <4 x float> @cos_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1715,22 +1612,17 @@ define <4 x float> @cos_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -1756,46 +1648,37 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -1829,41 +1712,31 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -2042,7 +1915,6 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -2073,7 +1945,6 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2088,7 +1959,6 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -2118,8 +1988,7 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2542,19 +2411,20 @@ define <16 x half> @cos_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
-; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fsqrt.ll b/llvm/test/CodeGen/AArch64/fsqrt.ll
index 6c5fd8e52b017c..466f1a14c29c46 100644
--- a/llvm/test/CodeGen/AArch64/fsqrt.ll
+++ b/llvm/test/CodeGen/AArch64/fsqrt.ll
@@ -66,27 +66,18 @@ entry:
 define <3 x double> @sqrt_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: sqrt_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fsqrt v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fsqrt v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sqrt_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    fsqrt d2, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    fsqrt d2, d2
 ; CHECK-GI-NEXT:    fsqrt v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.sqrt.v3f64(<3 x double> %a)
@@ -225,7 +216,6 @@ entry:
 define <4 x half> @sqrt_v4f16(<4 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: sqrt_v4f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
@@ -244,7 +234,6 @@ define <4 x half> @sqrt_v4f16(<4 x half> %a) {
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-SD-NOFP16-NEXT:    fcvt h1, s4
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: sqrt_v4f16:

diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 3037a9552bc27e..e5aa360f804c1f 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -21,7 +21,6 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-SD-LABEL: fshl_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsr w8, w1, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w0, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -266,7 +265,6 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-SD-LABEL: fshr_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9
@@ -667,7 +665,6 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-SD-LABEL: or_shl_fshl_simplify:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsr w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w1, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -696,7 +693,6 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-SD-LABEL: or_lshr_fshr_simplify:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9

diff  --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index 2d84a69f3144e3..bf68b6bc7eaa96 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -124,9 +124,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE-NEXT:    fmov s0, w8
 ; CHECK-SVE-NEXT:    fmov s1, w9
 ; CHECK-SVE-NEXT:    mov v0.s[1], w10
-; CHECK-SVE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SVE-NEXT:    mov v1.s[1], w11
-; CHECK-SVE-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SVE-NEXT:    b use
 ;
 ; CHECK-SVE2p1-LABEL: test_fixed_extract:
@@ -140,9 +138,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE2p1-NEXT:    fmov s0, w8
 ; CHECK-SVE2p1-NEXT:    fmov s1, w9
 ; CHECK-SVE2p1-NEXT:    mov v0.s[1], w10
-; CHECK-SVE2p1-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SVE2p1-NEXT:    mov v1.s[1], w11
-; CHECK-SVE2p1-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SVE2p1-NEXT:    b use
     %r = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 %i, i64 %n)
     %v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv8i1.i64(<vscale x 8 x i1> %r, i64 0)

diff  --git a/llvm/test/CodeGen/AArch64/get_vector_length.ll b/llvm/test/CodeGen/AArch64/get_vector_length.ll
index 3a83edb339b848..73dc32dfe4721f 100644
--- a/llvm/test/CodeGen/AArch64/get_vector_length.ll
+++ b/llvm/test/CodeGen/AArch64/get_vector_length.ll
@@ -33,7 +33,6 @@ define i32 @vector_length_i64(i64 %tc) {
 ; CHECK-NEXT:    cntd x8
 ; CHECK-NEXT:    cmp x0, x8
 ; CHECK-NEXT:    csel x0, x0, x8, lo
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %a = call i32 @llvm.experimental.get.vector.length.i64(i64 %tc, i32 2, i1 true)
   ret i32 %a

diff  --git a/llvm/test/CodeGen/AArch64/half.ll b/llvm/test/CodeGen/AArch64/half.ll
index bb802033e05fc6..032b659f30b1b1 100644
--- a/llvm/test/CodeGen/AArch64/half.ll
+++ b/llvm/test/CodeGen/AArch64/half.ll
@@ -25,7 +25,6 @@ define i16 @test_bitcast_from_half(ptr %addr) {
 define i16 @test_reg_bitcast_from_half(half %in) {
 ; CHECK-LABEL: test_reg_bitcast_from_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %val = bitcast half %in to i16
@@ -46,7 +45,6 @@ define half @test_reg_bitcast_to_half(i16 %in) {
 ; CHECK-LABEL: test_reg_bitcast_to_half:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %val = bitcast i16 %in to half
   ret half %val

diff  --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index cb9f04a7fac48a..c3fdc7db2abbee 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -15,7 +15,6 @@
 define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x80
 ; CHECK-NEXT:    cset w0, eq
@@ -29,7 +28,6 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_lowestbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -43,7 +41,6 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x18
 ; CHECK-NEXT:    cset w0, eq
@@ -59,7 +56,6 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_signbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x8000
 ; CHECK-NEXT:    cset w0, eq
@@ -73,7 +69,6 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_lowestbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -87,7 +82,6 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0xff0
 ; CHECK-NEXT:    cset w0, eq
@@ -268,7 +262,6 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_ne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    ubfx w0, w8, #7, #1
 ; CHECK-NEXT:    ret
@@ -325,7 +318,6 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #128 // =0x80
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    cmp w8, #1

diff  --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 32a62453202f40..4a73b10811d293 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -16,7 +16,6 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x80
 ; CHECK-NEXT:    cset w0, eq
@@ -31,7 +30,6 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_lowestbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -46,7 +44,6 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x18
 ; CHECK-NEXT:    cset w0, eq
@@ -63,7 +60,6 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_signbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x8000
 ; CHECK-NEXT:    cset w0, eq
@@ -78,7 +74,6 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_lowestbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -93,7 +88,6 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0xff0
 ; CHECK-NEXT:    cset w0, eq
@@ -272,7 +266,6 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_ne:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    lsr w0, w8, #7
 ; CHECK-NEXT:    ret
@@ -289,7 +282,7 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32_x_is_const_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #43605
+; CHECK-NEXT:    mov w8, #43605 // =0xaa55
 ; CHECK-NEXT:    movk w8, #43605, lsl #16
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    tst w8, #0x1
@@ -303,8 +296,8 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32_x_is_const2_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
-; CHECK-NEXT:    mov w9, #43605
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    mov w9, #43605 // =0xaa55
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    movk w9, #43605, lsl #16
 ; CHECK-NEXT:    tst w8, w9
@@ -319,8 +312,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_slt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #24
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov w8, #24 // =0x18
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    ubfx w0, w8, #7, #1
@@ -334,8 +326,7 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-128
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov w8, #-128 // =0xffffff80
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    and w8, w8, #0x80

diff  --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 61964060ca2c8b..189706f6ed4edd 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -1123,52 +1123,33 @@ entry:
 define <3 x i64> @v3i64_i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: v3i64_i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    ldr d16, [sp, #24]
-; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    ldr d16, [sp, #24]
 ; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    cmgt v0.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-SD-NEXT:    cmgt v1.2d, v5.2d, v2.2d
 ; CHECK-SD-NEXT:    mov v2.16b, v1.16b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bsl v2.16b, v17.16b, v16.16b
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v3i64_i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-GI-NEXT:    ldr x8, [sp]
-; CHECK-GI-NEXT:    ldr x10, [sp, #24]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    cmgt v2.2d, v5.2d, v2.2d
+; CHECK-GI-NEXT:    ldr x8, [sp]
 ; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
+; CHECK-GI-NEXT:    cmgt v2.2d, v5.2d, v2.2d
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-GI-NEXT:    fmov x9, d2
+; CHECK-GI-NEXT:    ldr x10, [sp, #24]
 ; CHECK-GI-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    cmgt v0.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    fmov x9, d2
 ; CHECK-GI-NEXT:    sbfx x9, x9, #0, #1
 ; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-GI-NEXT:    and x8, x8, x9
@@ -1176,7 +1157,6 @@ define <3 x i64> @v3i64_i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %d, <3 x i64>
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = icmp slt <3 x i64> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index 47aa34e3c01156..bf20309b5b938b 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -32,15 +32,15 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: STRXui $x0, %stack.0, 0 :: (store (s64) into %stack.0)
     ; CHECK-NEXT: dead undef [[COPY:%[0-9]+]].sub_32:gpr64 = COPY $x1
-    ; CHECK-NEXT: dead undef [[DEF:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def %6
-    ; CHECK-NEXT: STRXui %6, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: dead undef [[DEF:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def %8
+    ; CHECK-NEXT: STRXui %8, %stack.1, 0 :: (store (s64) into %stack.1)
     ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
     ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
     ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
     ; CHECK-NEXT: STRXui [[LDRXui1]], [[LDRXui]], 1 :: (store (s64) into stack + 8)
-    ; CHECK-NEXT: STRXui undef %8:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
+    ; CHECK-NEXT: STRXui undef %9:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
     ; CHECK-NEXT: RET_ReallyLR
     %0:gpr64sp = COPY $x0
     undef %1.sub_32:gpr64 = COPY $x1
@@ -70,15 +70,15 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: STRXui $x0, %stack.0, 0 :: (store (s64) into %stack.0)
     ; CHECK-NEXT: dead undef [[COPY:%[0-9]+]].sub_32:gpr64 = COPY $x1
-    ; CHECK-NEXT: dead undef [[MOVi32imm:%[0-9]+]].sub_32:gpr64 = MOVi32imm 4, implicit-def %6
-    ; CHECK-NEXT: STRXui %6, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: dead undef [[MOVi32imm:%[0-9]+]].sub_32:gpr64 = MOVi32imm 4, implicit-def %8
+    ; CHECK-NEXT: STRXui %8, %stack.1, 0 :: (store (s64) into %stack.1)
     ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
     ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
     ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
     ; CHECK-NEXT: STRXui [[LDRXui1]], [[LDRXui]], 1 :: (store (s64) into stack + 8)
-    ; CHECK-NEXT: STRXui undef %8:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
+    ; CHECK-NEXT: STRXui undef %9:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
     ; CHECK-NEXT: RET_ReallyLR
     %0:gpr64sp = COPY $x0
     undef %1.sub_32:gpr64 = COPY $x1

diff  --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed90524..f98855dfd992b8 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,7 +80,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    stp xzr, xzr, [sp]
 ; CHECK-NEXT:    stp x8, xzr, [sp, #16]
 ; CHECK-NEXT:    bl _fprintf
@@ -92,7 +91,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ldr x8, [sp, #40] ; 8-byte Folded Reload
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    ; kill: def $w8 killed $w8 killed $x8 def $x8
 ; CHECK-NEXT:    str x8, [sp]
 ; CHECK-NEXT:    bl _fprintf
 ; CHECK-NEXT:    brk #0x1

diff  --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a5d74ef75f0a0a..a764026d1f81b4 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -51,7 +51,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.9(0x01288b01), %bb.5(0x01288b01), %bb.2(0x11f46a91), %bb.6(0x23e8d524), %bb.7(0x47d1aa49)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead early-clobber %12:gpr64, dead early-clobber %13:gpr64sp = JumpTableDest32 [[DEF8]], [[DEF7]], %jump-table.0
-  ; CHECK-NEXT:   BR undef %18:gpr64
+  ; CHECK-NEXT:   BR undef [[DEF10]]
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
   ; CHECK-NEXT:   successors: %bb.8(0x80000000)

diff  --git a/llvm/test/CodeGen/AArch64/insert-extend.ll b/llvm/test/CodeGen/AArch64/insert-extend.ll
index 851fb0d03e8aa3..11d947816b07e0 100644
--- a/llvm/test/CodeGen/AArch64/insert-extend.ll
+++ b/llvm/test/CodeGen/AArch64/insert-extend.ll
@@ -46,8 +46,6 @@ define <8 x i16> @load4_v4i8_zext_add(float %tmp, ptr %a, ptr %b) {
 define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr nocapture noundef readonly %p2, i32 noundef %st2) {
 ; CHECK-LABEL: large:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll
index 6828fa9f1508c8..a12a8c92605980 100644
--- a/llvm/test/CodeGen/AArch64/insert-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll
@@ -47,11 +47,10 @@ define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
 define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) {
 ; CHECK-LABEL: insert_v16i8_4_15:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $q2_q3
+; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
-; CHECK-NEXT:    mov v3.16b, v1.16b
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   ret <16 x i8> %s2
@@ -91,9 +90,7 @@ define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) {
 ; CHECK-LABEL: insert_v8i8_4_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i8> %s2
@@ -103,9 +100,7 @@ define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) {
 ; CHECK-LABEL: insert_v8i8_4_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
   ret <8 x i8> %s2
@@ -146,11 +141,10 @@ define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
 define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) {
 ; CHECK-LABEL: insert_v8i16_2_15:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $q2_q3
+; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    adrp x8, .LCPI13_0
-; CHECK-NEXT:    mov v3.16b, v1.16b
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i16> %s2
@@ -190,9 +184,7 @@ define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) {
 ; CHECK-LABEL: insert_v4i16_2_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   ret <4 x i16> %s2
@@ -202,9 +194,7 @@ define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) {
 ; CHECK-LABEL: insert_v4i16_2_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x i16> %s2
@@ -272,7 +262,6 @@ define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) {
 define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v16i8_4_15:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI24_0
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI24_0]
@@ -324,9 +313,7 @@ define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v8i8_4_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <4 x i8>, ptr %a
   %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -340,7 +327,6 @@ define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr s2, [x0]
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <4 x i8>, ptr %a
   %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -380,7 +366,6 @@ define <8 x i8> @load_v8i8_2_1(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[0], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -392,7 +377,6 @@ define <8 x i8> @load_v8i8_2_15(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v8i8_2_15:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    adrp x8, .LCPI33_0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI33_0]
@@ -410,7 +394,6 @@ define <8 x i8> @load_v8i8_2_2(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -424,7 +407,6 @@ define <8 x i8> @load_v8i8_2_3(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -438,7 +420,6 @@ define <8 x i8> @load_v8i8_2_4(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -450,10 +431,8 @@ define <4 x i8> @load_v4i8_2_1(float %tmp, <4 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v4i8_2_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1 v0.8b, v0.8b, v0.8b
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -468,7 +447,6 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    zip1 v2.8b, v0.8b, v0.8b
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -493,7 +471,6 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
 define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
 ; CHECK-LABEL: load_v8i16_2_15:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI40_0
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI40_0]
@@ -545,9 +522,7 @@ define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) {
 ; CHECK-LABEL: load_v4i16_2_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i16>, ptr %a
   %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -561,7 +536,6 @@ define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr s2, [x0]
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i16>, ptr %a
   %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -630,7 +604,6 @@ define <8 x i8> @load2_v4i8(float %tmp, ptr %a, ptr %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %la = load <4 x i8>, ptr %a
   %lb = load <4 x i8>, ptr %b

diff  --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll
index 54ee693db1239f..8c1840ee788c07 100644
--- a/llvm/test/CodeGen/AArch64/insertextract.ll
+++ b/llvm/test/CodeGen/AArch64/insertextract.ll
@@ -5,7 +5,6 @@
 define <2 x double> @insert_v2f64_0(<2 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -16,7 +15,6 @@ entry:
 define <2 x double> @insert_v2f64_1(<2 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f64_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -30,7 +28,6 @@ define <2 x double> @insert_v2f64_c(<2 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str d1, [x8]
@@ -56,13 +53,9 @@ entry:
 define <3 x double> @insert_v3f64_0(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3f64_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-SD-NEXT:    mov v0.d[0], v3.d[0]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: insert_v3f64_0:
@@ -87,10 +80,6 @@ entry:
 define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3f64_c:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
@@ -100,8 +89,6 @@ define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-NEXT:    ldr q0, [sp]
 ; CHECK-SD-NEXT:    ldr d2, [sp, #16]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    add sp, sp, #32
 ; CHECK-SD-NEXT:    ret
 ;
@@ -114,19 +101,14 @@ define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov w8, w0
 ; CHECK-GI-NEXT:    mov x9, sp
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    str d3, [x9, x8, lsl #3]
 ; CHECK-GI-NEXT:    ldp q0, q2, [sp]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    mov sp, x29
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
@@ -138,7 +120,6 @@ entry:
 define <4 x double> @insert_v4f64_0(<4 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[0], v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -149,7 +130,6 @@ entry:
 define <4 x double> @insert_v4f64_2(<4 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f64_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[0], v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -162,7 +142,6 @@ define <4 x double> @insert_v4f64_c(<4 x double> %a, double %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str d2, [x9, x8, lsl #3]
@@ -195,10 +174,7 @@ entry:
 define <2 x float> @insert_v2f32_0(<2 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x float> %a, float %b, i32 0
@@ -208,10 +184,7 @@ entry:
 define <2 x float> @insert_v2f32_1(<2 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f32_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x float> %a, float %b, i32 1
@@ -224,7 +197,6 @@ define <2 x float> @insert_v2f32_c(<2 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SD-NEXT:    str s1, [x8]
@@ -252,7 +224,6 @@ entry:
 define <3 x float> @insert_v3f32_0(<3 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v3f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-NEXT:    mov v1.s[2], v0.s[2]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -265,14 +236,12 @@ entry:
 define <3 x float> @insert_v3f32_2(<3 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3f32_2:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-SD-NEXT:    mov v0.s[2], v1.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: insert_v3f32_2:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.s[0], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-GI-NEXT:    mov v2.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    mov v2.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -288,7 +257,6 @@ define <3 x float> @insert_v3f32_c(<3 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str s1, [x8]
@@ -314,7 +282,6 @@ entry:
 define <4 x float> @insert_v4f32_0(<4 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -325,7 +292,6 @@ entry:
 define <4 x float> @insert_v4f32_2(<4 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f32_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[2], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -339,7 +305,6 @@ define <4 x float> @insert_v4f32_c(<4 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str s1, [x8]
@@ -365,7 +330,6 @@ entry:
 define <8 x float> @insert_v8f32_0(<8 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    mov v0.s[0], v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -376,7 +340,6 @@ entry:
 define <8 x float> @insert_v8f32_2(<8 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f32_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -389,7 +352,6 @@ define <8 x float> @insert_v8f32_c(<8 x float> %a, float %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str s2, [x9, x8, lsl #2]
@@ -422,10 +384,7 @@ entry:
 define <4 x half> @insert_v4f16_0(<4 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x half> %a, half %b, i32 0
@@ -435,10 +394,7 @@ entry:
 define <4 x half> @insert_v4f16_2(<4 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f16_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x half> %a, half %b, i32 2
@@ -451,7 +407,6 @@ define <4 x half> @insert_v4f16_c(<4 x half> %a, half %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    str h1, [x8]
@@ -479,7 +434,6 @@ entry:
 define <8 x half> @insert_v8f16_0(<8 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -490,7 +444,6 @@ entry:
 define <8 x half> @insert_v8f16_2(<8 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f16_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -504,7 +457,6 @@ define <8 x half> @insert_v8f16_c(<8 x half> %a, half %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    str h1, [x8]
@@ -530,7 +482,6 @@ entry:
 define <16 x half> @insert_v16f16_0(<16 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v16f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    mov v0.h[0], v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -541,7 +492,6 @@ entry:
 define <16 x half> @insert_v16f16_2(<16 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v16f16_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -554,7 +504,6 @@ define <16 x half> @insert_v16f16_c(<16 x half> %a, half %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str h2, [x9, x8, lsl #1]
@@ -587,9 +536,7 @@ entry:
 define <8 x i8> @insert_v8i8_0(<8 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-LABEL: insert_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[0], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <8 x i8> %a, i8 %b, i32 0
@@ -599,9 +546,7 @@ entry:
 define <8 x i8> @insert_v8i8_2(<8 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-LABEL: insert_v8i8_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[2], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <8 x i8> %a, i8 %b, i32 2
@@ -614,7 +559,6 @@ define <8 x i8> @insert_v8i8_c(<8 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfxil x8, x1, #0, #3
 ; CHECK-SD-NEXT:    strb w0, [x8]
@@ -667,7 +611,6 @@ define <16 x i8> @insert_v16i8_c(<16 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfxil x8, x1, #0, #4
 ; CHECK-SD-NEXT:    strb w0, [x8]
@@ -717,7 +660,6 @@ define <32 x i8> @insert_v32i8_c(<32 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x1f
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    strb w0, [x9, x8]
@@ -752,9 +694,7 @@ entry:
 define <4 x i16> @insert_v4i16_0(<4 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-LABEL: insert_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x i16> %a, i16 %b, i32 0
@@ -764,9 +704,7 @@ entry:
 define <4 x i16> @insert_v4i16_2(<4 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-LABEL: insert_v4i16_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[2], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x i16> %a, i16 %b, i32 2
@@ -779,7 +717,6 @@ define <4 x i16> @insert_v4i16_c(<4 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x1, #1, #2
 ; CHECK-SD-NEXT:    strh w0, [x8]
@@ -830,7 +767,6 @@ define <8 x i16> @insert_v8i16_c(<8 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #1, #3
 ; CHECK-SD-NEXT:    strh w0, [x8]
@@ -878,7 +814,6 @@ define <16 x i16> @insert_v16i16_c(<16 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    strh w0, [x9, x8, lsl #1]
@@ -911,9 +846,7 @@ entry:
 define <2 x i32> @insert_v2i32_0(<2 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: insert_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x i32> %a, i32 %b, i32 0
@@ -923,9 +856,7 @@ entry:
 define <2 x i32> @insert_v2i32_1(<2 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: insert_v2i32_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x i32> %a, i32 %b, i32 1
@@ -938,7 +869,6 @@ define <2 x i32> @insert_v2i32_c(<2 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x1, #2, #1
 ; CHECK-SD-NEXT:    str w0, [x8]
@@ -1008,7 +938,6 @@ define <3 x i32> @insert_v3i32_c(<3 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-SD-NEXT:    str w0, [x8]
@@ -1057,7 +986,6 @@ define <4 x i32> @insert_v4i32_c(<4 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-SD-NEXT:    str w0, [x8]
@@ -1105,7 +1033,6 @@ define <8 x i32> @insert_v8i32_c(<8 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str w0, [x9, x8, lsl #2]
@@ -1161,7 +1088,6 @@ define <2 x i64> @insert_v2i64_c(<2 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #3, #1
 ; CHECK-SD-NEXT:    str x0, [x8]
@@ -1187,12 +1113,9 @@ entry:
 define <3 x i64> @insert_v3i64_0(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3i64_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[0], x0
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: insert_v3i64_0:
@@ -1217,10 +1140,6 @@ entry:
 define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3i64_c:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
@@ -1230,8 +1149,6 @@ define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-NEXT:    ldr q0, [sp]
 ; CHECK-SD-NEXT:    ldr d2, [sp, #16]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    add sp, sp, #32
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1244,19 +1161,14 @@ define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov w8, w1
 ; CHECK-GI-NEXT:    mov x9, sp
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    str x0, [x9, x8, lsl #3]
 ; CHECK-GI-NEXT:    ldp q0, q2, [sp]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    mov sp, x29
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
@@ -1290,7 +1202,6 @@ define <4 x i64> @insert_v4i64_c(<4 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str x0, [x9, x8, lsl #3]
@@ -1323,7 +1234,6 @@ entry:
 define double @extract_v2f64_0(<2 x double> %a, i32 %c) {
 ; CHECK-LABEL: extract_v2f64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <2 x double> %a, i32 0
@@ -1346,7 +1256,6 @@ define double @extract_v2f64_c(<2 x double> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr d0, [x8]
@@ -1391,12 +1300,8 @@ entry:
 define double @extract_v3f64_c(<3 x double> %a, i32 %c) {
 ; CHECK-SD-LABEL: extract_v3f64_c:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    mov x9, sp
@@ -1413,12 +1318,9 @@ define double @extract_v3f64_c(<3 x double> %a, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov w8, w0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x9, sp
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    ldr d0, [x9, x8, lsl #3]
@@ -1433,7 +1335,6 @@ entry:
 define double @extract_v4f64_0(<4 x double> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x double> %a, i32 0
@@ -1444,7 +1345,6 @@ define double @extract_v4f64_2(<4 x double> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f64_2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x double> %a, i32 2
@@ -1456,7 +1356,6 @@ define double @extract_v4f64_c(<4 x double> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr d0, [x9, x8, lsl #3]
@@ -1486,16 +1385,9 @@ entry:
 }
 
 define float @extract_v2f32_0(<2 x float> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2f32_0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v2f32_0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v2f32_0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
 entry:
   %d = extractelement <2 x float> %a, i32 0
   ret float %d
@@ -1504,7 +1396,6 @@ entry:
 define float @extract_v2f32_1(<2 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v2f32_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1518,7 +1409,6 @@ define float @extract_v2f32_c(<2 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -1544,7 +1434,6 @@ entry:
 define float @extract_v3f32_0(<3 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v3f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <3 x float> %a, i32 0
@@ -1567,7 +1456,6 @@ define float @extract_v3f32_c(<3 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -1593,7 +1481,6 @@ entry:
 define float @extract_v4f32_0(<4 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x float> %a, i32 0
@@ -1616,7 +1503,6 @@ define float @extract_v4f32_c(<4 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -1642,7 +1528,6 @@ entry:
 define float @extract_v8f32_0(<8 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8f32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <8 x float> %a, i32 0
@@ -1664,7 +1549,6 @@ define float @extract_v8f32_c(<8 x float> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr s0, [x9, x8, lsl #2]
@@ -1694,16 +1578,9 @@ entry:
 }
 
 define half @extract_v4f16_0(<4 x half> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v4f16_0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v4f16_0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v4f16_0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x half> %a, i32 0
   ret half %d
@@ -1712,7 +1589,6 @@ entry:
 define half @extract_v4f16_2(<4 x half> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f16_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -1726,7 +1602,6 @@ define half @extract_v4f16_c(<4 x half> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    ldr h0, [x8]
@@ -1752,7 +1627,6 @@ entry:
 define half @extract_v8f16_0(<8 x half> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <8 x half> %a, i32 0
@@ -1775,7 +1649,6 @@ define half @extract_v8f16_c(<8 x half> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    ldr h0, [x8]
@@ -1801,7 +1674,6 @@ entry:
 define half @extract_v16f16_0(<16 x half> %a, i32 %c) {
 ; CHECK-LABEL: extract_v16f16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <16 x half> %a, i32 0
@@ -1823,7 +1695,6 @@ define half @extract_v16f16_c(<16 x half> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr h0, [x9, x8, lsl #1]
@@ -1855,7 +1726,6 @@ entry:
 define i8 @extract_v8i8_0(<8 x i8> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1866,7 +1736,6 @@ entry:
 define i8 @extract_v8i8_2(<8 x i8> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8i8_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -1880,7 +1749,6 @@ define i8 @extract_v8i8_c(<8 x i8> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfxil x8, x0, #0, #3
 ; CHECK-SD-NEXT:    ldrb w0, [x8]
@@ -1931,7 +1799,6 @@ define i8 @extract_v16i8_c(<16 x i8> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfxil x8, x0, #0, #4
 ; CHECK-SD-NEXT:    ldrb w0, [x8]
@@ -1981,7 +1848,6 @@ define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x1f
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldrb w0, [x9, x8]
@@ -2015,7 +1881,6 @@ entry:
 define i16 @extract_v4i16_0(<4 x i16> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2026,7 +1891,6 @@ entry:
 define i16 @extract_v4i16_2(<4 x i16> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4i16_2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -2040,7 +1904,6 @@ define i16 @extract_v4i16_c(<4 x i16> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    ldrh w0, [x8]
@@ -2089,7 +1952,6 @@ define i16 @extract_v8i16_c(<8 x i16> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    ldrh w0, [x8]
@@ -2137,7 +1999,6 @@ define i16 @extract_v16i16_c(<16 x i16> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldrh w0, [x9, x8, lsl #1]
@@ -2167,16 +2028,10 @@ entry:
 }
 
 define i32 @extract_v2i32_0(<2 x i32> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i32_0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov w0, s0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v2i32_0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fmov w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v2i32_0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
 entry:
   %d = extractelement <2 x i32> %a, i32 0
   ret i32 %d
@@ -2185,13 +2040,11 @@ entry:
 define i32 @extract_v2i32_1(<2 x i32> %a, i32 %c) {
 ; CHECK-SD-LABEL: extract_v2i32_1:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: extract_v2i32_1:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -2206,7 +2059,6 @@ define i32 @extract_v2i32_c(<2 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -2261,7 +2113,6 @@ define i32 @extract_v3i32_c(<3 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -2316,7 +2167,6 @@ define i32 @extract_v4i32_c(<4 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -2370,7 +2220,6 @@ define i32 @extract_v8i32_c(<8 x i32> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr w0, [x9, x8, lsl #2]
@@ -2431,7 +2280,6 @@ define i64 @extract_v2i64_c(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -2455,32 +2303,20 @@ entry:
 }
 
 define i64 @extract_v3i64_0(<3 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v3i64_0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v3i64_0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fmov x0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v3i64_0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
 entry:
   %d = extractelement <3 x i64> %a, i32 0
   ret i64 %d
 }
 
 define i64 @extract_v3i64_2(<3 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v3i64_2:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    fmov x0, d2
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v3i64_2:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fmov x0, d2
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v3i64_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov x0, d2
+; CHECK-NEXT:    ret
 entry:
   %d = extractelement <3 x i64> %a, i32 2
   ret i64 %d
@@ -2489,12 +2325,8 @@ entry:
 define i64 @extract_v3i64_c(<3 x i64> %a, i32 %c) {
 ; CHECK-SD-LABEL: extract_v3i64_c:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    mov x9, sp
@@ -2511,12 +2343,9 @@ define i64 @extract_v3i64_c(<3 x i64> %a, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov w8, w0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x9, sp
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    ldr x0, [x9, x8, lsl #3]
@@ -2553,7 +2382,6 @@ define i64 @extract_v4i64_c(<4 x i64> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr x0, [x9, x8, lsl #3]

diff  --git a/llvm/test/CodeGen/AArch64/insertshuffleload.ll b/llvm/test/CodeGen/AArch64/insertshuffleload.ll
index b97b9b1dcdcf8d..8c8d392b9a9b31 100644
--- a/llvm/test/CodeGen/AArch64/insertshuffleload.ll
+++ b/llvm/test/CodeGen/AArch64/insertshuffleload.ll
@@ -259,7 +259,6 @@ define <8 x i8> @wrongidx_first(ptr %p) {
 ; CHECK-NEXT:    ldur d0, [x0, #1]
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
 ; CHECK-NEXT:    ld1 { v0.b }[7], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 1
   %l1 = load <8 x i8>, ptr %q
@@ -276,7 +275,6 @@ define <8 x i8> @wrong_last(ptr %p) {
 ; CHECK-NEXT:    add x8, x0, #8
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #1
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x8]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 8
   %l1 = load <8 x i8>, ptr %p
@@ -295,7 +293,6 @@ define <8 x i8> @wrong_shuffle(ptr %p) {
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 1
   %l1 = load <8 x i8>, ptr %q
@@ -348,7 +345,6 @@ define <8 x i8> @wrong_offsetfirst(ptr %p) {
 ; CHECK-NEXT:    ldur d0, [x0, #-1]
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 -1
   %l1 = load <8 x i8>, ptr %q
@@ -365,7 +361,6 @@ define <8 x i8> @wrong_offsetlast(ptr %p) {
 ; CHECK-NEXT:    add x8, x0, #7
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #1
 ; CHECK-NEXT:    ld1 { v0.b }[7], [x8]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 7
   %l1 = load <8 x i8>, ptr %p
@@ -383,7 +378,6 @@ define <8 x i8> @storebetween(ptr %p, ptr %r) {
 ; CHECK-NEXT:    strb wzr, [x1]
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 1
   %l1 = load <8 x i8>, ptr %q

diff  --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index cdf2a962f93226..698c77dda1388f 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -145,7 +145,6 @@ define i32 @ctz_nxv2i1(<vscale x 2 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 0)
   ret i32 %res
@@ -157,7 +156,6 @@ define i32 @ctz_nxv2i1_poison(<vscale x 2 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 1)
   ret i32 %res
@@ -179,10 +177,8 @@ define i32 @add_i32_ctz_nxv2i1_poison(<vscale x 2 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv2i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -196,7 +192,6 @@ define i32 @ctz_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 0)
   ret i32 %res
@@ -208,7 +203,6 @@ define i32 @ctz_nxv4i1_poison(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 1)
   ret i32 %res
@@ -230,10 +224,8 @@ define i32 @add_i32_ctz_nxv4i1_poison(<vscale x 4 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv4i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -247,7 +239,6 @@ define i32 @ctz_nxv8i1(<vscale x 8 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 0)
   ret i32 %res
@@ -259,7 +250,6 @@ define i32 @ctz_nxv8i1_poison(<vscale x 8 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 1)
   ret i32 %res
@@ -281,10 +271,8 @@ define i32 @add_i32_ctz_nxv8i1_poison(<vscale x 8 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv8i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -298,7 +286,6 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
   ret i32 %res
@@ -310,7 +297,6 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
   ret i32 %res
@@ -323,7 +309,6 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne <vscale x 16 x i8> %a, %b
   %select = select <vscale x 16 x i1> %pg, <vscale x 16 x i1> %cmp, <vscale x 16 x i1> zeroinitializer
@@ -348,10 +333,8 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv16i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -371,20 +354,17 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v16i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
   ret i32 %res
@@ -400,20 +380,17 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v16i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
   ret i32 %res
@@ -433,9 +410,8 @@ define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
 ;
 ; STREAMING-LABEL: add_i64_ctz_v16i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
@@ -457,20 +433,17 @@ define i32 @ctz_v8i1(<8 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v8i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
   ret i32 %res
@@ -486,20 +459,17 @@ define i32 @ctz_v8i1_poison(<8 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v8i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
   ret i32 %res
@@ -515,20 +485,17 @@ define i32 @ctz_v4i1(<4 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v4i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
 ; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
   ret i32 %res
@@ -544,20 +511,17 @@ define i32 @ctz_v4i1_poison(<4 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v4i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
 ; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
   ret i32 %res
@@ -573,20 +537,17 @@ define i32 @ctz_v2i1(<2 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v2i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
 ; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
   ret i32 %res
@@ -602,20 +563,17 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v2i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
 ; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
   ret i32 %res

diff  --git a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
index 2cf8621ca066dd..62a660f36a86d7 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
@@ -4,7 +4,6 @@
 define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z1.b, b1
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ret
@@ -15,7 +14,6 @@ define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8>
 define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov w8, v1.s[1]
 ; CHECK-NEXT:    fmov w9, s1
 ; CHECK-NEXT:    ptrue p1.b
@@ -38,7 +36,6 @@ define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8>
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umov w8, v1.h[1]
 ; CHECK-NEXT:    umov w9, v1.h[0]
 ; CHECK-NEXT:    umov w10, v1.h[2]
@@ -67,7 +64,6 @@ define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8>
 define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ret
@@ -78,7 +74,6 @@ define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8>
 define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z1.q, q1
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ret
@@ -89,7 +84,6 @@ define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8
 define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_v16i8_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup v1.16b, v1.b[0]
 ; CHECK-NEXT:    cmeq v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
@@ -101,7 +95,6 @@ define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mas
 define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_v16i8_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup v3.16b, v1.b[4]
 ; CHECK-NEXT:    dup v1.16b, v1.b[0]
 ; CHECK-NEXT:    cmeq v3.16b, v0.16b, v3.16b
@@ -116,7 +109,6 @@ define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mas
 define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_v16i8_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup v3.16b, v1.b[2]
 ; CHECK-NEXT:    dup v4.16b, v1.b[0]
 ; CHECK-NEXT:    dup v5.16b, v1.b[4]
@@ -139,14 +131,11 @@ define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mas
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.16b, v2.16b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    cmlt v2.16b, v2.16b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask)
   ret <16 x i1> %r
@@ -157,13 +146,10 @@ define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %m
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.16b, v2.16b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmlt v2.16b, v2.16b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
   ret <16 x i1> %r
@@ -174,14 +160,11 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.8b, v2.8b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    cmlt v2.8b, v2.8b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask)
   ret <8 x i1> %r
@@ -190,7 +173,6 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #
 define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv8i16_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z1.q, q1
 ; CHECK-NEXT:    match p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    ret
@@ -203,8 +185,6 @@ define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v2.8h, v2.8b, #0
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    shl v2.8h, v2.8h, #15
 ; CHECK-NEXT:    cmlt v2.8h, v2.8h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
@@ -223,13 +203,10 @@ define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.8b, v2.8b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmlt v2.8b, v2.8b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
   ret <8 x i1> %r
@@ -243,10 +220,8 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z3.b, z1.b[1]
 ; CHECK-NEXT:    mov z4.b, b1
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z5.b, z1.b[2]
 ; CHECK-NEXT:    cmpeq p2.b, p1/z, z0.b, z3.b
@@ -465,7 +440,6 @@ define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    mov z3.s, s1
 ; CHECK-NEXT:    ptrue p1.s
@@ -490,10 +464,9 @@ define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32
 define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv2xi64_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, z1.d[1]
-; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    mov z1.d, d1
+; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    cmpeq p2.d, p1/z, z0.d, z2.d
 ; CHECK-NEXT:    cmpeq p1.d, p1/z, z0.d, z1.d
 ; CHECK-NEXT:    sel p1.b, p1, p1.b, p2.b

diff  --git a/llvm/test/CodeGen/AArch64/itofp-bf16.ll b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
index 58591b11c184fb..b95f32f9095d95 100644
--- a/llvm/test/CodeGen/AArch64/itofp-bf16.ll
+++ b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
@@ -87,7 +87,6 @@ define bfloat @stofp_i64_bf16(i64 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i64 %a to bfloat
@@ -116,7 +115,6 @@ define bfloat @utofp_i64_bf16(i64 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i64 %a to bfloat
@@ -135,7 +133,6 @@ define bfloat @stofp_i32_bf16(i32 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i32 %a to bfloat
@@ -154,7 +151,6 @@ define bfloat @utofp_i32_bf16(i32 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i32 %a to bfloat
@@ -173,7 +169,6 @@ define bfloat @stofp_i16_bf16(i16 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i16 %a to bfloat
@@ -192,7 +187,6 @@ define bfloat @utofp_i16_bf16(i16 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i16 %a to bfloat
@@ -211,7 +205,6 @@ define bfloat @stofp_i8_bf16(i8 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i8 %a to bfloat
@@ -230,7 +223,6 @@ define bfloat @utofp_i8_bf16(i8 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i8 %a to bfloat
@@ -288,7 +280,6 @@ define <2 x bfloat> @stofp_v2i64_v2bf16(<2 x i64> %a) {
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x bfloat>
@@ -338,7 +329,6 @@ define <2 x bfloat> @utofp_v2i64_v2bf16(<2 x i64> %a) {
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x bfloat>
@@ -348,9 +338,6 @@ entry:
 define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) {
 ; CHECK-LABEL: stofp_v3i64_v3bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -375,9 +362,6 @@ entry:
 define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) {
 ; CHECK-LABEL: utofp_v3i64_v3bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -852,9 +836,8 @@ entry:
 define <2 x bfloat> @stofp_v2i32_v2bf16(<2 x i32> %a) {
 ; CHECK-LABEL: stofp_v2i32_v2bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    movi v1.4s, #1
 ; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.4s, #1
 ; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
 ; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -869,9 +852,8 @@ entry:
 define <2 x bfloat> @utofp_v2i32_v2bf16(<2 x i32> %a) {
 ; CHECK-LABEL: utofp_v2i32_v2bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    movi v1.4s, #1
 ; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.4s, #1
 ; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
 ; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -1494,7 +1476,6 @@ entry:
 define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-LABEL: stofp_v2i8_v2bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w9, v0.s[1]
 ; CHECK-NEXT:    fmov w10, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
@@ -1515,7 +1496,6 @@ define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp <2 x i8> %a to <2 x bfloat>
@@ -1525,7 +1505,6 @@ entry:
 define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-LABEL: utofp_v2i8_v2bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w9, v0.s[1]
 ; CHECK-NEXT:    fmov w10, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
@@ -1546,7 +1525,6 @@ define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp <2 x i8> %a to <2 x bfloat>

diff  --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 81c1a64f2d434f..a7059b6d8768aa 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -1081,11 +1081,8 @@ define <3 x fp128> @stofp_v3i64_v3f128(<3 x i64> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatditf
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -1139,11 +1136,8 @@ define <3 x fp128> @utofp_v3i64_v2f128(<3 x i64> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatunditf
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -1207,12 +1201,10 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floattidf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattidf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1232,11 +1224,9 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floattidf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattidf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1265,12 +1255,10 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floatuntidf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntidf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1290,11 +1278,9 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floatuntidf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntidf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1483,29 +1469,18 @@ entry:
 define <3 x double> @stofp_v3i64_v3f64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: stofp_v3i64_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i64_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <3 x i64> %a to <3 x double>
@@ -1515,29 +1490,18 @@ entry:
 define <3 x double> @utofp_v3i64_v3f64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: utofp_v3i64_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i64_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <3 x i64> %a to <3 x double>
@@ -1775,7 +1739,6 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatsitf
@@ -1797,7 +1760,6 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatsitf
@@ -1822,7 +1784,6 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatunsitf
@@ -1844,7 +1805,6 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatunsitf
@@ -2001,10 +1961,8 @@ define <3 x double> @stofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-SD-NEXT:    sshll2 v0.2d, v0.4s, #0
 ; CHECK-SD-NEXT:    scvtf v3.2d, v1.2d
 ; CHECK-SD-NEXT:    scvtf v2.2d, v0.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i32_v3f64:
@@ -2013,7 +1971,6 @@ define <3 x double> @stofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-GI-NEXT:    sshll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    scvtf v3.2d, v1.2d
 ; CHECK-GI-NEXT:    scvtf v2.2d, v0.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v3.d[1]
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    ret
@@ -2029,10 +1986,8 @@ define <3 x double> @utofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-SD-NEXT:    ucvtf v3.2d, v1.2d
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v0.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i32_v3f64:
@@ -2041,7 +1996,6 @@ define <3 x double> @utofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    ucvtf v3.2d, v1.2d
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v0.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v3.d[1]
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    ret
@@ -2433,7 +2387,6 @@ define <2 x fp128> @stofp_v2i16_v2f128(<2 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sxth w0, w8
@@ -2460,7 +2413,6 @@ define <2 x fp128> @utofp_v2i16_v2f128(<2 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    and w0, w8, #0xffff
@@ -2487,7 +2439,6 @@ define <3 x fp128> @stofp_v3i16_v3f128(<3 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.h[0]
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __floatsitf
@@ -2516,7 +2467,6 @@ define <3 x fp128> @utofp_v3i16_v3f128(<3 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __floatunsitf
@@ -2588,10 +2538,7 @@ define <3 x double> @stofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    sshll2 v1.2d, v1.4s, #0
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    scvtf v2.2d, v1.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i16_v3f64:
@@ -2601,9 +2548,7 @@ define <3 x double> @stofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-GI-NEXT:    sshll2 v1.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    scvtf v2.2d, v1.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <3 x i16> %a to <3 x double>
@@ -2618,10 +2563,7 @@ define <3 x double> @utofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    ushll2 v1.2d, v1.4s, #0
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v1.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i16_v3f64:
@@ -2631,9 +2573,7 @@ define <3 x double> @utofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v1.2d
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <3 x i16> %a to <3 x double>
@@ -3083,7 +3023,6 @@ define <2 x fp128> @stofp_v2i8_v2f128(<2 x i8> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sxtb w0, w8
@@ -3110,7 +3049,6 @@ define <2 x fp128> @utofp_v2i8_v2f128(<2 x i8> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    and w0, w8, #0xff
@@ -3299,11 +3237,8 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
 ; CHECK-SD-NEXT:    scvtf v2.2d, v1.2d
 ; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i8_v3f64:
@@ -3320,12 +3255,10 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    smov x9, v0.h[3]
 ; CHECK-GI-NEXT:    mov v2.d[0], x8
-; CHECK-GI-NEXT:    scvtf v0.2d, v1.2d
 ; CHECK-GI-NEXT:    mov v2.d[1], x9
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    scvtf v0.2d, v1.2d
 ; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <3 x i8> %a to <3 x double>
@@ -3345,10 +3278,7 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v1.2d
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i8_v3f64:
@@ -3365,12 +3295,10 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    umov w9, v0.h[3]
 ; CHECK-GI-NEXT:    mov v2.d[0], x8
-; CHECK-GI-NEXT:    ucvtf v0.2d, v1.2d
 ; CHECK-GI-NEXT:    mov v2.d[1], x9
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ucvtf v0.2d, v1.2d
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <3 x i8> %a to <3 x double>
@@ -3442,7 +3370,6 @@ entry:
 define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
 ; CHECK-SD-LABEL: stofp_v8i8_v8f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.b[0]
 ; CHECK-SD-NEXT:    umov w9, v0.b[2]
 ; CHECK-SD-NEXT:    umov w11, v0.b[4]
@@ -3499,7 +3426,6 @@ entry:
 define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
 ; CHECK-SD-LABEL: utofp_v8i8_v8f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.b[0]
 ; CHECK-SD-NEXT:    umov w9, v0.b[2]
 ; CHECK-SD-NEXT:    umov w11, v0.b[4]
@@ -4151,15 +4077,12 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4177,11 +4100,9 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -4210,15 +4131,12 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4236,11 +4154,9 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -4274,18 +4190,15 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    mov x0, x22
 ; CHECK-SD-NEXT:    mov x1, x21
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -4313,16 +4226,13 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    mov x0, x21
 ; CHECK-GI-NEXT:    mov x1, x22
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
@@ -4358,18 +4268,15 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    mov x0, x22
 ; CHECK-SD-NEXT:    mov x1, x21
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -4397,16 +4304,13 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    mov x0, x21
 ; CHECK-GI-NEXT:    mov x1, x22
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
@@ -4445,9 +4349,6 @@ entry:
 define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
 ; CHECK-SD-LABEL: stofp_v3i64_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
@@ -4457,9 +4358,6 @@ define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: stofp_v3i64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
@@ -4477,9 +4375,6 @@ entry:
 define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
 ; CHECK-SD-LABEL: utofp_v3i64_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
@@ -4489,9 +4384,6 @@ define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: utofp_v3i64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
@@ -5641,7 +5533,6 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5661,15 +5552,12 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-FP16-NEXT:    ret
 ;
@@ -5695,7 +5583,6 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5713,11 +5600,9 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v0.h[0]
@@ -5754,7 +5639,6 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5774,15 +5658,12 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-FP16-NEXT:    ret
 ;
@@ -5808,7 +5689,6 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5826,11 +5706,9 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v0.h[0]
@@ -5880,7 +5758,6 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #64
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5905,18 +5782,15 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x22
 ; CHECK-SD-FP16-NEXT:    mov x1, x21
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -5959,8 +5833,7 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    fmov d0, d1
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5983,23 +5856,19 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x21
 ; CHECK-GI-FP16-NEXT:    mov x1, x22
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    fmov d0, d1
 ; CHECK-GI-FP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
@@ -6045,7 +5914,6 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #64
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -6070,18 +5938,15 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x22
 ; CHECK-SD-FP16-NEXT:    mov x1, x21
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -6124,8 +5989,7 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    fmov d0, d1
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -6148,23 +6012,19 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x21
 ; CHECK-GI-FP16-NEXT:    mov x1, x22
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-FP16-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    fmov d0, d1
 ; CHECK-GI-FP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
@@ -6182,7 +6042,6 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: stofp_v2i64_v2f16:
@@ -6192,7 +6051,6 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-FP16-NEXT:    scvtf h0, x9
 ; CHECK-SD-FP16-NEXT:    scvtf h1, x8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v2i64_v2f16:
@@ -6211,7 +6069,6 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h0, d0
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x half>
@@ -6228,7 +6085,6 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: utofp_v2i64_v2f16:
@@ -6238,7 +6094,6 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-FP16-NEXT:    ucvtf h0, x9
 ; CHECK-SD-FP16-NEXT:    ucvtf h1, x8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v2i64_v2f16:
@@ -6257,7 +6112,6 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h0, d0
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x half>
@@ -6267,9 +6121,6 @@ entry:
 define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-SD-LABEL: stofp_v3i64_v3f16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
@@ -6280,9 +6131,6 @@ define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v3i64_v3f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
@@ -6293,9 +6141,6 @@ define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: stofp_v3i64_v3f16:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
@@ -6305,7 +6150,6 @@ define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = sitofp <3 x i64> %a to <3 x half>
@@ -6315,9 +6159,6 @@ entry:
 define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-SD-LABEL: utofp_v3i64_v3f16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
@@ -6328,9 +6169,6 @@ define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v3i64_v3f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
@@ -6341,9 +6179,6 @@ define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: utofp_v3i64_v3f16:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
@@ -6353,7 +6188,6 @@ define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = uitofp <3 x i64> %a to <3 x half>
@@ -6392,7 +6226,6 @@ define <4 x half> @stofp_v4i64_v4f16(<4 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h2, d3
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = sitofp <4 x i64> %a to <4 x half>
@@ -6431,7 +6264,6 @@ define <4 x half> @utofp_v4i64_v4f16(<4 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h2, d3
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = uitofp <4 x i64> %a to <4 x half>
@@ -7173,7 +7005,6 @@ entry:
 define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
 ; CHECK-SD-LABEL: stofp_v2i32_v2f16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-SD-NEXT:    ret
@@ -7193,7 +7024,6 @@ entry:
 define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
 ; CHECK-SD-LABEL: utofp_v2i32_v2f16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-SD-NEXT:    ret
@@ -7899,7 +7729,6 @@ entry:
 define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-LABEL: stofp_v2i8_v2f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-SD-NOFP16-NEXT:    sxtb w9, w9
@@ -7909,12 +7738,10 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: stofp_v2i8_v2f16:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-FP16-NEXT:    fmov w9, s0
 ; CHECK-SD-FP16-NEXT:    sxtb w9, w9
@@ -7922,7 +7749,6 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-FP16-NEXT:    scvtf h0, w9
 ; CHECK-SD-FP16-NEXT:    scvtf h1, w8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v2i8_v2f16:
@@ -7950,7 +7776,6 @@ entry:
 define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-LABEL: utofp_v2i8_v2f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-SD-NOFP16-NEXT:    and w9, w9, #0xff
@@ -7960,12 +7785,10 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: utofp_v2i8_v2f16:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-FP16-NEXT:    fmov w9, s0
 ; CHECK-SD-FP16-NEXT:    and w9, w9, #0xff
@@ -7973,7 +7796,6 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-FP16-NEXT:    ucvtf h0, w9
 ; CHECK-SD-FP16-NEXT:    ucvtf h1, w8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v2i8_v2f16:

diff  --git a/llvm/test/CodeGen/AArch64/ldexp.ll b/llvm/test/CodeGen/AArch64/ldexp.ll
index 6019fa1490e3dc..0a3a85ab181866 100644
--- a/llvm/test/CodeGen/AArch64/ldexp.ll
+++ b/llvm/test/CodeGen/AArch64/ldexp.ll
@@ -6,13 +6,10 @@
 define double @testExp(double %val, i32 %a) {
 ; SVE-LABEL: testExp:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; SVE-NEXT:    sxtw x8, w0
 ; SVE-NEXT:    ptrue p0.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    fmov d1, x8
 ; SVE-NEXT:    fscale z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; WINDOWS-LABEL: testExp:
@@ -28,13 +25,10 @@ declare double @ldexp(double, i32) memory(none)
 define double @testExpIntrinsic(double %val, i32 %a) {
 ; SVE-LABEL: testExpIntrinsic:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; SVE-NEXT:    sxtw x8, w0
 ; SVE-NEXT:    ptrue p0.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    fmov d1, x8
 ; SVE-NEXT:    fscale z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; WINDOWS-LABEL: testExpIntrinsic:
@@ -50,9 +44,7 @@ define float @testExpf(float %val, i32 %a) {
 ; SVELINUX:       // %bb.0: // %entry
 ; SVELINUX-NEXT:    fmov s1, w0
 ; SVELINUX-NEXT:    ptrue p0.s
-; SVELINUX-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; SVELINUX-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
-; SVELINUX-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; SVELINUX-NEXT:    ret
 ;
 ; SVEWINDOWS-LABEL: testExpf:
@@ -72,9 +64,7 @@ define float @testExpfIntrinsic(float %val, i32 %a) {
 ; SVE:       // %bb.0: // %entry
 ; SVE-NEXT:    fmov s1, w0
 ; SVE-NEXT:    ptrue p0.s
-; SVE-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; SVE-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; WINDOWS-LABEL: testExpfIntrinsic:

diff  --git a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
index 7e28c863b07a9a..2776bd68639f3c 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
@@ -10,14 +10,12 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
@@ -31,14 +29,12 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/llrint-conv.ll b/llvm/test/CodeGen/AArch64/llrint-conv.ll
index 3a6396d120f791..6ca211917ab9e1 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv.ll
@@ -7,7 +7,6 @@ define i32 @testmsws(float %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x0, s0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.f32(float %x)
@@ -31,7 +30,6 @@ define i32 @testmswd(double %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx d0, d0
 ; CHECK-NEXT:    fcvtzs x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.f64(double %x)
@@ -57,7 +55,6 @@ define i32 @testmswl(fp128 %x) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl llrintl
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index 4bf65e7d6fd088..d6adcc3f0ebb9a 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -7,13 +7,11 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
@@ -26,13 +24,11 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
index c1ea891bc86e7e..93eed72178cb31 100644
--- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
@@ -57,7 +57,6 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov h1, v0.h[1]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    fcvt s0, h1
@@ -88,7 +87,6 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.h[3], v1.h[0]
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -100,7 +98,6 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; GISEL-NEXT:    .cfi_def_cfa_offset 32
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset b8, -16
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov h8, v0.h[1]
 ; GISEL-NEXT:    fcvt s0, h0
 ; GISEL-NEXT:    bl exp10f
@@ -114,7 +111,6 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; GISEL-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v0.h[1], v1.h[0]
-; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    add sp, sp, #32
 ; GISEL-NEXT:    ret
   %r = call <2 x half> @llvm.exp10.v2f16(<2 x half> %x)
@@ -128,7 +124,6 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov h1, v0.h[1]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    fcvt s0, h1
@@ -159,7 +154,6 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.h[3], v1.h[0]
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -172,7 +166,6 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; GISEL-NEXT:    .cfi_offset w30, -16
 ; GISEL-NEXT:    .cfi_offset b8, -24
 ; GISEL-NEXT:    .cfi_offset b9, -32
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov h8, v0.h[1]
 ; GISEL-NEXT:    mov h9, v0.h[2]
 ; GISEL-NEXT:    fcvt s0, h0
@@ -193,8 +186,7 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; GISEL-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v1.h[1], v2.h[0]
 ; GISEL-NEXT:    mov v1.h[2], v0.h[0]
-; GISEL-NEXT:    mov v0.16b, v1.16b
-; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; GISEL-NEXT:    fmov d0, d1
 ; GISEL-NEXT:    add sp, sp, #64
 ; GISEL-NEXT:    ret
   %r = call <3 x half> @llvm.exp10.v3f16(<3 x half> %x)
@@ -208,7 +200,6 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov h1, v0.h[1]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    fcvt s0, h1
@@ -239,7 +230,6 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.h[3], v1.h[0]
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -254,7 +244,6 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; GISEL-NEXT:    .cfi_offset b8, -16
 ; GISEL-NEXT:    .cfi_offset b9, -24
 ; GISEL-NEXT:    .cfi_offset b10, -32
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov h8, v0.h[1]
 ; GISEL-NEXT:    mov h9, v0.h[2]
 ; GISEL-NEXT:    mov h10, v0.h[3]
@@ -284,8 +273,7 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; GISEL-NEXT:    mov v1.h[1], v3.h[0]
 ; GISEL-NEXT:    mov v1.h[2], v2.h[0]
 ; GISEL-NEXT:    mov v1.h[3], v0.h[0]
-; GISEL-NEXT:    mov v0.16b, v1.16b
-; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; GISEL-NEXT:    fmov d0, d1
 ; GISEL-NEXT:    add sp, sp, #80
 ; GISEL-NEXT:    ret
   %r = call <4 x half> @llvm.exp10.v4f16(<4 x half> %x)
@@ -301,28 +289,14 @@ define float @exp10_f32(float %x) {
 }
 
 define <1 x float> @exp10_v1f32(<1 x float> %x) {
-; SDAG-LABEL: exp10_v1f32:
-; SDAG:       // %bb.0:
-; SDAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; SDAG-NEXT:    .cfi_def_cfa_offset 16
-; SDAG-NEXT:    .cfi_offset w30, -16
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
-; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $d0
-; SDAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: exp10_v1f32:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISEL-NEXT:    .cfi_def_cfa_offset 16
-; GISEL-NEXT:    .cfi_offset w30, -16
-; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $d0
-; GISEL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISEL-NEXT:    ret
+; CHECK-LABEL: exp10_v1f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp10f
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %r = call <1 x float> @llvm.exp10.v1f32(<1 x float> %x)
   ret <1 x float> %r
 }
@@ -334,20 +308,15 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.s[1], v1.s[0]
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -359,16 +328,12 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
 ; GISEL-NEXT:    .cfi_def_cfa_offset 32
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset b8, -16
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov s8, v0.s[1]
-; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v1.s[1], v0.s[0]
@@ -389,20 +354,16 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    mov v0.s[1], v1.s[0]
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov s0, v0.s[2]
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v1.s[2], v0.s[0]
 ; SDAG-NEXT:    mov v0.16b, v1.16b
@@ -420,18 +381,14 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
 ; GISEL-NEXT:    .cfi_offset b9, -32
 ; GISEL-NEXT:    mov s8, v0.s[1]
 ; GISEL-NEXT:    mov s9, v0.s[2]
-; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s9
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; GISEL-NEXT:    mov v1.s[1], v2.s[0]
@@ -453,27 +410,22 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    mov v0.s[1], v1.s[0]
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov s0, v0.s[2]
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    mov v1.s[2], v0.s[0]
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov s0, v0.s[3]
 ; SDAG-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v1.s[3], v0.s[0]
 ; SDAG-NEXT:    mov v0.16b, v1.16b
@@ -494,22 +446,17 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
 ; GISEL-NEXT:    mov s8, v0.s[1]
 ; GISEL-NEXT:    mov s9, v0.s[2]
 ; GISEL-NEXT:    mov s10, v0.s[3]
-; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s9
 ; GISEL-NEXT:    bl exp10f
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s10
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
-; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -555,13 +502,10 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    bl exp10
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    bl exp10
 ; SDAG-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.d[1], v1.d[0]
 ; SDAG-NEXT:    add sp, sp, #48
@@ -576,14 +520,11 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset b8, -16
 ; GISEL-NEXT:    mov d8, v0.d[1]
-; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d8
 ; GISEL-NEXT:    bl exp10
 ; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v1.d[1], v0.d[0]
@@ -663,22 +604,17 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; SDAG-NEXT:    bl exp10
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    bl exp10
 ; SDAG-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov v0.d[1], v1.d[0]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    bl exp10
-; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    bl exp10
 ; SDAG-NEXT:    fmov d1, d0
 ; SDAG-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -699,28 +635,23 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; GISEL-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    mov d8, v0.d[1]
 ; GISEL-NEXT:    mov d9, v1.d[1]
-; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d8
 ; GISEL-NEXT:    bl exp10
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d9
 ; GISEL-NEXT:    bl exp10
-; GISEL-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; GISEL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; GISEL-NEXT:    mov v2.d[1], v1.d[0]
-; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; GISEL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; GISEL-NEXT:    fmov d2, d1
+; GISEL-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
 ; GISEL-NEXT:    mov v1.d[1], v0.d[0]
+; GISEL-NEXT:    mov v2.d[1], v3.d[0]
 ; GISEL-NEXT:    mov v0.16b, v2.16b
 ; GISEL-NEXT:    add sp, sp, #80
 ; GISEL-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
index e4cb8ed6eaf90f..98c50d772f9dee 100644
--- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
@@ -9,7 +9,6 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    add x0, sp, #36
@@ -46,9 +45,7 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
   %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
@@ -70,14 +67,11 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
 ; CHECK-NEXT:    add x0, sp, #56
 ; CHECK-NEXT:    add x19, sp, #56
 ; CHECK-NEXT:    bl frexpf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    add x0, sp, #44
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    add x0, sp, #44
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    add x0, sp, #60
 ; CHECK-NEXT:    add x20, sp, #60
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
@@ -87,7 +81,6 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    ldr s1, [sp, #44]
 ; CHECK-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x19]
 ; CHECK-NEXT:    mov v2.s[2], v0.s[0]

diff  --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index c5efc796e7a3c4..8e70d79fa61eff 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -109,7 +109,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    add x0, sp, #36
@@ -152,8 +151,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; CHECK-NEXT:    mov v1.h[2], v4.h[0]
 ; CHECK-NEXT:    mov v0.h[3], v2.h[0]
 ; CHECK-NEXT:    mov v1.h[3], v3.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 ;
@@ -169,7 +166,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
 ; NO-LIBCALL-NEXT:    .cfi_offset b10, -40
 ; NO-LIBCALL-NEXT:    .cfi_offset b11, -48
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    mov h1, v0.h[1]
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fcvt s8, h1
@@ -224,13 +220,10 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; NO-LIBCALL-NEXT:    fmov s1, s0
 ; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    fcvt h2, s1
-; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; NO-LIBCALL-NEXT:    mov v1.h[3], v2.h[0]
-; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; NO-LIBCALL-NEXT:    add sp, sp, #80
 ; NO-LIBCALL-NEXT:    ret
   %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
@@ -288,7 +281,6 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
 ; CHECK-NEXT:    add x0, sp, #20
 ; CHECK-NEXT:    add x1, sp, #16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #28
@@ -328,13 +320,10 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl sinf
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -342,18 +331,14 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
 ; NO-LIBCALL-NEXT:    fmov s0, s9
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    bl cosf
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s9
@@ -379,11 +364,9 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    add x0, sp, #44
 ; CHECK-NEXT:    add x1, sp, #40
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #28
@@ -397,8 +380,6 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 ;
@@ -410,34 +391,26 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
 ; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 64
 ; NO-LIBCALL-NEXT:    .cfi_offset w30, -8
 ; NO-LIBCALL-NEXT:    .cfi_offset b8, -16
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl sinf
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl cosf
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    fmov s1, s0
 ; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    mov v1.s[1], v2.s[0]
-; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; NO-LIBCALL-NEXT:    add sp, sp, #64
 ; NO-LIBCALL-NEXT:    ret
   %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
@@ -495,7 +468,6 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
 ; CHECK-NEXT:    add x0, sp, #56
 ; CHECK-NEXT:    add x1, sp, #40
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl sincos
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #32
@@ -525,21 +497,16 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov d0, d8
 ; NO-LIBCALL-NEXT:    bl sin
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sin
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.d[1], v1.d[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov d0, d8
 ; NO-LIBCALL-NEXT:    bl cos
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cos
 ; NO-LIBCALL-NEXT:    fmov d1, d0
 ; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
index 3fa5d64a210e19..2c9811dbcf992f 100644
--- a/llvm/test/CodeGen/AArch64/load.ll
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -233,7 +233,6 @@ define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b) {
 ; CHECK-SD-NEXT:    ld1 { v0.b }[0], [x0]
 ; CHECK-SD-NEXT:    add x8, x0, #1
 ; CHECK-SD-NEXT:    ld1 { v0.b }[4], [x8]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: load_v2i8:
@@ -241,7 +240,6 @@ define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b) {
 ; CHECK-GI-NEXT:    ld1 { v0.b }[0], [x0]
 ; CHECK-GI-NEXT:    ldr b1, [x0, #1]
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <2 x i8>, ptr %ptr
   ret <2 x i8> %a
@@ -272,7 +270,6 @@ define <2 x i16> @load_v2i16(ptr %ptr) {
 ; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-SD-NEXT:    add x8, x0, #2
 ; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: load_v2i16:
@@ -280,7 +277,6 @@ define <2 x i16> @load_v2i16(ptr %ptr) {
 ; CHECK-GI-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-GI-NEXT:    ldr h1, [x0, #2]
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <2 x i16>, ptr %ptr
   ret <2 x i16> %a
@@ -366,7 +362,6 @@ define <7 x i8> @load_v7i8(ptr %ptr) {
 ; CHECK-GI-NEXT:    mov v0.b[5], v1.b[0]
 ; CHECK-GI-NEXT:    ldr b1, [x0, #6]
 ; CHECK-GI-NEXT:    mov v0.b[6], v1.b[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <7 x i8>, ptr %ptr
   ret <7 x i8> %a
@@ -385,7 +380,6 @@ define <3 x i16> @load_v3i16(ptr %ptr) {
 ; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
 ; CHECK-GI-NEXT:    add x8, x0, #4
 ; CHECK-GI-NEXT:    ld1 { v0.h }[2], [x8]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <3 x i16>, ptr %ptr
   ret <3 x i16> %a

diff  --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
index 31047954401cf5..0a62f8076a1be0 100644
--- a/llvm/test/CodeGen/AArch64/logic-shift.ll
+++ b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -5,7 +5,6 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
@@ -65,7 +64,6 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: or_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
@@ -139,7 +137,6 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
@@ -233,7 +230,6 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
@@ -293,7 +289,6 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: xor_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
@@ -367,7 +362,6 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
@@ -461,7 +455,6 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
@@ -521,7 +514,6 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: and_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
@@ -595,7 +587,6 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
index 6771a862c386fd..69bb6c3d0a0978 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
@@ -10,14 +10,12 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
@@ -31,14 +29,12 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/lrint-conv.ll b/llvm/test/CodeGen/AArch64/lrint-conv.ll
index 60393b4f18d9d0..e37556032732de 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv.ll
@@ -7,7 +7,6 @@ define i32 @testmsws(float %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x0, s0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
@@ -31,7 +30,6 @@ define i32 @testmswd(double %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx d0, d0
 ; CHECK-NEXT:    fcvtzs x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
@@ -57,7 +55,6 @@ define dso_local i32 @testmswl(fp128 %x) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl lrintl
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index bf78fd456eac0e..980d420e90c48d 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -7,13 +7,11 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
@@ -26,13 +24,11 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
-; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/lslfast.ll b/llvm/test/CodeGen/AArch64/lslfast.ll
index 5ec70b5f229754..391ecda2c8c7de 100644
--- a/llvm/test/CodeGen/AArch64/lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/lslfast.ll
@@ -60,7 +60,6 @@ entry:
 define i64 @testsext3(i32 noundef %x, i64 noundef %y, i64 noundef %z) {
 ; CHECK-LABEL: testsext3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x8, x0, #3, #32
 ; CHECK-NEXT:    add x9, x8, x1
 ; CHECK-NEXT:    add x8, x8, x2
@@ -78,7 +77,6 @@ entry:
 define i64 @testzext3(i32 noundef %x, i64 noundef %y, i64 noundef %z) {
 ; CHECK-LABEL: testzext3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x8, x0, #3, #32
 ; CHECK-NEXT:    add x9, x8, x1
 ; CHECK-NEXT:    add x8, x8, x2

diff  --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
index 4c8e589391c3ac..57f05a6832a4f5 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
@@ -4,7 +4,6 @@
 define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef %B, ptr noalias nocapture noundef %C, i32 noundef %n) {
 ; CHECK-LABEL: fma_dup_f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    cbz w2, .LBB0_8
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    cmp w2, #15

diff  --git a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
index f6bbdf5d95d870..7452a7b96d327f 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
@@ -4,7 +4,6 @@
 define void @foo(i32 noundef %limit, ptr %out, ptr %y) {
 ; CHECK-LABEL: foo:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp w0, #1
 ; CHECK-NEXT:    b.lt .LBB0_10
 ; CHECK-NEXT:  // %bb.1: // %for.cond1.preheader.us.preheader

diff  --git a/llvm/test/CodeGen/AArch64/memset-inline.ll b/llvm/test/CodeGen/AArch64/memset-inline.ll
index 02d852b5ce45ad..d2ca291ba0c0ca 100644
--- a/llvm/test/CodeGen/AArch64/memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/memset-inline.ll
@@ -29,7 +29,7 @@ define void @memset_2(ptr %a, i8 %value) nounwind {
 define void @memset_4(ptr %a, i8 %value) nounwind {
 ; ALL-LABEL: memset_4:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    mov w8, #16843009
+; ALL-NEXT:    mov w8, #16843009 // =0x1010101
 ; ALL-NEXT:    and w9, w1, #0xff
 ; ALL-NEXT:    mul w8, w9, w8
 ; ALL-NEXT:    str w8, [x0]
@@ -41,8 +41,7 @@ define void @memset_4(ptr %a, i8 %value) nounwind {
 define void @memset_8(ptr %a, i8 %value) nounwind {
 ; ALL-LABEL: memset_8:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
-; ALL-NEXT:    mov x8, #72340172838076673
+; ALL-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; ALL-NEXT:    and x9, x1, #0xff
 ; ALL-NEXT:    mul x8, x9, x8
 ; ALL-NEXT:    str x8, [x0]
@@ -54,8 +53,7 @@ define void @memset_8(ptr %a, i8 %value) nounwind {
 define void @memset_16(ptr %a, i8 %value) nounwind {
 ; ALL-LABEL: memset_16:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
-; ALL-NEXT:    mov x8, #72340172838076673
+; ALL-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; ALL-NEXT:    and x9, x1, #0xff
 ; ALL-NEXT:    mul x8, x9, x8
 ; ALL-NEXT:    stp x8, x8, [x0]
@@ -67,8 +65,7 @@ define void @memset_16(ptr %a, i8 %value) nounwind {
 define void @memset_32(ptr %a, i8 %value) nounwind {
 ; GPR-LABEL: memset_32:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
-; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #16]
@@ -87,8 +84,7 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
 define void @memset_64(ptr %a, i8 %value) nounwind {
 ; GPR-LABEL: memset_64:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
-; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #48]
@@ -112,8 +108,7 @@ define void @memset_64(ptr %a, i8 %value) nounwind {
 define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
 ; ALL-LABEL: aligned_memset_16:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
-; ALL-NEXT:    mov x8, #72340172838076673
+; ALL-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; ALL-NEXT:    and x9, x1, #0xff
 ; ALL-NEXT:    mul x8, x9, x8
 ; ALL-NEXT:    stp x8, x8, [x0]
@@ -125,8 +120,7 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
 define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
 ; GPR-LABEL: aligned_memset_32:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
-; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #16]
@@ -145,8 +139,7 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
 define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
 ; GPR-LABEL: aligned_memset_64:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
-; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #48]

diff  --git a/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll b/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
index 97cfb13bcd5eb3..cda064a0fc53d0 100644
--- a/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
@@ -7,8 +7,7 @@ declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 define void @test1(ptr %a, i8 %value) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    mov x8, #72340172838076673
+; CHECK-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NEXT:    and x9, x1, #0xff
 ; CHECK-NEXT:    mul x8, x9, x8
 ; CHECK-NEXT:    str x8, [x0]
@@ -20,7 +19,7 @@ define void @test1(ptr %a, i8 %value) nounwind {
 define void @regular_memset_calls_external_function(ptr %a, i8 %value) nounwind {
 ; CHECK-LABEL: regular_memset_calls_external_function:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w2, #1024
+; CHECK-NEXT:    mov w2, #1024 // =0x400
 ; CHECK-NEXT:    b memset
   tail call void @llvm.memset.p0.i64(ptr %a, i8 %value, i64 1024, i1 0)
   ret void
@@ -32,6 +31,37 @@ define void @inlined_set_doesnt_call_external_function(ptr %a, i8 %value) nounwi
 ; CHECK-NEXT:    dup v0.16b, w1
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    stp q0, q0, [x0, #32]
+; CHECK-NEXT:    stp q0, q0, [x0, #64]
+; CHECK-NEXT:    stp q0, q0, [x0, #96]
+; CHECK-NEXT:    stp q0, q0, [x0, #128]
+; CHECK-NEXT:    stp q0, q0, [x0, #160]
+; CHECK-NEXT:    stp q0, q0, [x0, #192]
+; CHECK-NEXT:    stp q0, q0, [x0, #224]
+; CHECK-NEXT:    stp q0, q0, [x0, #256]
+; CHECK-NEXT:    stp q0, q0, [x0, #288]
+; CHECK-NEXT:    stp q0, q0, [x0, #320]
+; CHECK-NEXT:    stp q0, q0, [x0, #352]
+; CHECK-NEXT:    stp q0, q0, [x0, #384]
+; CHECK-NEXT:    stp q0, q0, [x0, #416]
+; CHECK-NEXT:    stp q0, q0, [x0, #448]
+; CHECK-NEXT:    stp q0, q0, [x0, #480]
+; CHECK-NEXT:    stp q0, q0, [x0, #512]
+; CHECK-NEXT:    stp q0, q0, [x0, #544]
+; CHECK-NEXT:    stp q0, q0, [x0, #576]
+; CHECK-NEXT:    stp q0, q0, [x0, #608]
+; CHECK-NEXT:    stp q0, q0, [x0, #640]
+; CHECK-NEXT:    stp q0, q0, [x0, #672]
+; CHECK-NEXT:    stp q0, q0, [x0, #704]
+; CHECK-NEXT:    stp q0, q0, [x0, #736]
+; CHECK-NEXT:    stp q0, q0, [x0, #768]
+; CHECK-NEXT:    stp q0, q0, [x0, #800]
+; CHECK-NEXT:    stp q0, q0, [x0, #832]
+; CHECK-NEXT:    stp q0, q0, [x0, #864]
+; CHECK-NEXT:    stp q0, q0, [x0, #896]
+; CHECK-NEXT:    stp q0, q0, [x0, #928]
+; CHECK-NEXT:    stp q0, q0, [x0, #960]
+; CHECK-NEXT:    stp q0, q0, [x0, #992]
+; CHECK-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1024, i1 0)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
index ea40f9e52dcd68..823dd815d556c4 100644
--- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
+++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
@@ -1595,10 +1595,10 @@ body:             |
 # CHECK-NEXT: D0_HI [0B,48r:0)[192r,224r:1) 0 at 0B-phi 1 at 192r
 # CHECK-NEXT: D1_HI [0B,88r:0)[208r,224r:1) 0 at 0B-phi 1 at 208r
 # CHECK-NEXT: D2_HI [0B,96r:0) 0 at 0B-phi
-# CHECK-NEXT: %0 [48r,168r:0) 0 at 48r  weight:0.000000e+00
+# CHECK-NEXT: %0 [48r,168r:0) 0 at 48r  L000000000000002B [48r,168r:0) 0 at 48r  L0000000000000004 [48r,104r:0) 0 at 48r  weight:0.000000e+00
 # CHECK-NEXT: %1 [88r,120r:0) 0 at 88r  weight:0.000000e+00
 # CHECK-NEXT: %2 [96r,128r:0) 0 at 96r  weight:0.000000e+00
-# CHECK-NEXT: %3 [104r,176r:0) 0 at 104r  weight:0.000000e+00
+# CHECK-NEXT: %3 [104r,176r:0) 0 at 104r  L000000000000002B [104r,176r:0) 0 at 104r  L0000000000000004 [104r,104d:0) 0 at 104r  weight:0.000000e+00
 # CHECK-NEXT: %6 [80r,128r:0) 0 at 80r  weight:0.000000e+00
 # CHECK-NEXT: %7 [128r,160r:0) 0 at 128r  weight:0.000000e+00
 # CHECK-NEXT: %8 [120r,136r:0) 0 at 120r  weight:0.000000e+00

diff  --git a/llvm/test/CodeGen/AArch64/mla_mls_merge.ll b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
index 17abaf387c1804..5dbf522f38ba4e 100644
--- a/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
+++ b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
@@ -102,7 +102,6 @@ define <4 x i16> @test_mls0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
 ; CHECK-NEXT:    umlsl v0.8h, v2.8b, v3.8b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
@@ -118,7 +117,6 @@ define <4 x i16> @test_mls1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
 ; CHECK-NEXT:    smlsl v0.8h, v2.8b, v3.8b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
@@ -134,7 +132,6 @@ define <2 x i32> @test_mls2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
 ; CHECK-NEXT:    umlsl v0.4s, v2.4h, v3.4h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
@@ -150,7 +147,6 @@ define <2 x i32> @test_mls3(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
 ; CHECK-NEXT:    smlsl v0.4s, v2.4h, v3.4h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
@@ -166,7 +162,6 @@ define <1 x i64> @test_mls4(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
 ; CHECK-NEXT:    umlsl v0.2d, v2.2s, v3.2s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
@@ -182,7 +177,6 @@ define <1 x i64> @test_mls5(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
 ; CHECK-NEXT:    smlsl v0.2d, v2.2s, v3.2s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll
index 9ca975d9e742e1..206561079d4beb 100644
--- a/llvm/test/CodeGen/AArch64/mul.ll
+++ b/llvm/test/CodeGen/AArch64/mul.ll
@@ -423,14 +423,8 @@ entry:
 define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d3
 ; CHECK-SD-NEXT:    fmov x9, d0
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    fmov x10, d1
 ; CHECK-SD-NEXT:    fmov x11, d2
 ; CHECK-SD-NEXT:    mul x8, x9, x8
@@ -445,10 +439,6 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    fmov x8, d0
@@ -463,7 +453,6 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mul x8, x8, x9
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index d06612e2332e6e..ef66d77cf4862d 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -519,7 +519,6 @@ define <2 x i16> @bitcast_i32_to_v2i16(i32 %word) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    fmov s0, w0
 ; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: bitcast_i32_to_v2i16:
@@ -560,7 +559,6 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
 ; CHECK-LE-NEXT:    umov w9, v0.b[1]
 ; CHECK-LE-NEXT:    fmov s0, w8
 ; CHECK-LE-NEXT:    mov v0.s[1], w9
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: bitcast_i16_to_v2i8:

diff  --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index f6dbf5251fc272..0f482e2caecc1c 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1238,23 +1238,18 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <8 x i8> @vselect_equivalent_shuffle_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    adrp x8, .LCPI89_0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    adrp x8, .LCPI89_0
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI89_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI89_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI89_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI89_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %c
@@ -1270,12 +1265,10 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) {
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i8_zero:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI90_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI90_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %c
@@ -1293,9 +1286,8 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) {
 define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    adrp x8, .LCPI91_0
 ; CHECK-SD-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-SD-NEXT:    adrp x8, .LCPI91_0
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI91_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
@@ -1303,12 +1295,10 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI91_0
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI91_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b }, v0.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i8> zeroinitializer, <8 x i8> %a, <8 x i32> <i32 8, i32 0, i32 10, i32 1, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i8> %c
@@ -1332,23 +1322,12 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-SD-NEXT: .byte   14
 ; CHECK-SD-NEXT: .byte   15
 define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    adrp x8, .LCPI92_0
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI92_0]
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI92_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI92_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: vselect_equivalent_shuffle_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI92_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI92_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    ret
   %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %c
 }
@@ -1372,9 +1351,8 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zero:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI93_0
 ; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    adrp x8, .LCPI93_0
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI93_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
@@ -1408,9 +1386,8 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zeroswap:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q31_q0
-; CHECK-GI-NEXT:    adrp x8, .LCPI94_0
 ; CHECK-GI-NEXT:    movi v31.2d, #0000000000000000
+; CHECK-GI-NEXT:    adrp x8, .LCPI94_0
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI94_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v31.16b, v0.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
@@ -1421,22 +1398,16 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) {
 define <4 x i16> @vselect_equivalent_shuffle_v4i16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v4i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-NEXT:    mov v0.h[2], v1.h[1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI95_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI95_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI95_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
   ret <4 x i16> %c
@@ -1452,9 +1423,7 @@ define <4 x i32> @vselect_equivalent_shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI96_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI96_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 3>

diff  --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
index cf09a46000dab9..baab8b7111acce 100644
--- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
@@ -103,7 +103,6 @@ entry:
 define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -117,7 +116,6 @@ entry:
 define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_u32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -158,7 +156,6 @@ entry:
 define <2 x i32> @test_vdot_lane_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_u32_zero:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -173,7 +170,6 @@ entry:
 define <4 x i32> @test_vdotq_lane_u32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_u32_zero:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -217,7 +213,6 @@ entry:
 define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -231,7 +226,6 @@ entry:
 define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -272,7 +266,6 @@ entry:
 define <2 x i32> @test_vdot_lane_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_s32_zero:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -287,7 +280,6 @@ entry:
 define <4 x i32> @test_vdotq_lane_s32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_s32_zero:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
index 93a50ec305e1e5..bcf333ce9e0719 100644
--- a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
@@ -6,7 +6,6 @@ define <4 x i16> @addls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addls_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    saddl v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addls_v8i8_0:
@@ -27,9 +26,7 @@ entry:
 define <4 x i16> @addws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addws_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    saddw v0.8h, v0.8h, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addws_v8i8_0:
@@ -48,7 +45,6 @@ define <4 x i16> @addlu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addlu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    uaddl v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addlu_v8i8_0:
@@ -69,9 +65,7 @@ entry:
 define <4 x i16> @addwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addwu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    uaddw v0.8h, v0.8h, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addwu_v8i8_0:
@@ -90,7 +84,6 @@ define <4 x i16> @subls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: subls_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ssubl v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subls_v8i8_0:
@@ -111,9 +104,7 @@ entry:
 define <4 x i16> @subws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: subws_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ssubw v0.8h, v0.8h, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subws_v8i8_0:
@@ -132,7 +123,6 @@ define <4 x i16> @sublu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: sublu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    usubl v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sublu_v8i8_0:
@@ -153,9 +143,7 @@ entry:
 define <4 x i16> @subwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: subwu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    usubw v0.8h, v0.8h, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subwu_v8i8_0:
@@ -174,7 +162,6 @@ define <4 x i16> @mulls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: mulls_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mulls_v8i8_0:
@@ -209,7 +196,6 @@ define <4 x i16> @mullu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: mullu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mullu_v8i8_0:
@@ -244,7 +230,6 @@ define <2 x i32> @addls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addls_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    saddl v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addls_v4i16_0:
@@ -265,9 +250,7 @@ entry:
 define <2 x i32> @addws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addws_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    saddw v0.4s, v0.4s, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addws_v4i16_0:
@@ -286,7 +269,6 @@ define <2 x i32> @addlu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addlu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    uaddl v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addlu_v4i16_0:
@@ -307,9 +289,7 @@ entry:
 define <2 x i32> @addwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addwu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    uaddw v0.4s, v0.4s, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addwu_v4i16_0:
@@ -328,7 +308,6 @@ define <2 x i32> @subls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: subls_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ssubl v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subls_v4i16_0:
@@ -349,9 +328,7 @@ entry:
 define <2 x i32> @subws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: subws_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ssubw v0.4s, v0.4s, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subws_v4i16_0:
@@ -370,7 +347,6 @@ define <2 x i32> @sublu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: sublu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    usubl v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sublu_v4i16_0:
@@ -391,9 +367,7 @@ entry:
 define <2 x i32> @subwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: subwu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    usubw v0.4s, v0.4s, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subwu_v4i16_0:
@@ -412,7 +386,6 @@ define <2 x i32> @mulls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: mulls_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    smull v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mulls_v4i16_0:
@@ -447,7 +420,6 @@ define <2 x i32> @mullu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: mullu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    umull v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mullu_v4i16_0:
@@ -482,7 +454,6 @@ define <1 x i64> @addls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addls_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    saddl v0.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addls_v2i32_0:
@@ -506,9 +477,7 @@ entry:
 define <1 x i64> @addws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addws_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    saddw v0.2d, v0.2d, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addws_v2i32_0:
@@ -530,7 +499,6 @@ define <1 x i64> @addlu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addlu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    uaddl v0.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addlu_v2i32_0:
@@ -554,9 +522,7 @@ entry:
 define <1 x i64> @addwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addwu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    uaddw v0.2d, v0.2d, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addwu_v2i32_0:
@@ -578,7 +544,6 @@ define <1 x i64> @subls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: subls_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ssubl v0.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subls_v2i32_0:
@@ -602,9 +567,7 @@ entry:
 define <1 x i64> @subws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: subws_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ssubw v0.2d, v0.2d, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subws_v2i32_0:
@@ -626,7 +589,6 @@ define <1 x i64> @sublu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: sublu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    usubl v0.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sublu_v2i32_0:
@@ -650,9 +612,7 @@ entry:
 define <1 x i64> @subwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: subwu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    usubw v0.2d, v0.2d, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subwu_v2i32_0:
@@ -674,7 +634,6 @@ define <1 x i64> @mulls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: mulls_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mulls_v2i32_0:
@@ -696,24 +655,14 @@ entry:
 }
 
 define <1 x i64> @mulws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
-; CHECK-SD-LABEL: mulws_v2i32_0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov x8, d0
-; CHECK-SD-NEXT:    fmov x9, d1
-; CHECK-SD-NEXT:    mul x8, x8, x9
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: mulws_v2i32_0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    fmov x9, d1
-; CHECK-GI-NEXT:    mul x8, x8, x9
-; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: mulws_v2i32_0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
 entry:
   %s1s = sext <2 x i32> %s1 to <2 x i64>
   %t1 = shufflevector <2 x i64> %s1s, <2 x i64> poison, <1 x i32> <i32 0>
@@ -725,7 +674,6 @@ define <1 x i64> @mullu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: mullu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mullu_v2i32_0:
@@ -747,24 +695,14 @@ entry:
 }
 
 define <1 x i64> @mulwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
-; CHECK-SD-LABEL: mulwu_v2i32_0:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov x8, d0
-; CHECK-SD-NEXT:    fmov x9, d1
-; CHECK-SD-NEXT:    mul x8, x8, x9
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: mulwu_v2i32_0:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    fmov x9, d1
-; CHECK-GI-NEXT:    mul x8, x8, x9
-; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: mulwu_v2i32_0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
 entry:
   %s1s = zext <2 x i32> %s1 to <2 x i64>
   %t1 = shufflevector <2 x i64> %s1s, <2 x i64> poison, <1 x i32> <i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/neon-extract.ll b/llvm/test/CodeGen/AArch64/neon-extract.ll
index 26c4ec4d4bdb25..a071df1fd49d6c 100644
--- a/llvm/test/CodeGen/AArch64/neon-extract.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extract.ll
@@ -261,7 +261,6 @@ entry:
 define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) {
 ; CHECK-LABEL: test_undef_vext_s16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
index 3f590226c47150..5714a142b39f2c 100644
--- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
@@ -50,7 +50,6 @@ define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.b[6], w9
 ; CHECK-NEXT:    mov v0.b[7], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %a0 = extractelement <4 x i32> %a, i32 0
@@ -83,10 +82,6 @@ entry:
 define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: extract_4_v4i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
@@ -206,8 +201,6 @@ define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 ; CHECK-LABEL: extract_4_mixed:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    xtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
@@ -267,12 +260,8 @@ entry:
 define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
 ; CHECK-LABEL: extract_4_v4i32_badindex:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
index 0f4eec4fdfda1b..4c589abff1cfcd 100644
--- a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
+++ b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
@@ -9,9 +9,7 @@
 define <8 x i8> @test_lane0_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: test_lane0_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[0], v1.b[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 16 x i8> %b, i32 0
     %d = insertelement <8 x i8> %a, i8 %c, i32 0
@@ -21,9 +19,7 @@ define <8 x i8> @test_lane0_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
 define <8 x i8> @test_lane15_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: test_lane15_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[7], v1.b[15]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 16 x i8> %b, i32 15
     %d = insertelement <8 x i8> %a, i8 %c, i32 7
@@ -68,9 +64,7 @@ define <16 x i8> @test_q_lane16_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
 define <4 x half> @test_lane0_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: test_lane0_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x half> %b, i32 0
     %d = insertelement <4 x half> %a, half %c, i32 0
@@ -80,9 +74,7 @@ define <4 x half> @test_lane0_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
 define <4 x half> @test_lane7_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: test_lane7_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[7]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x half> %b, i32 7
     %d = insertelement <4 x half> %a, half %c, i32 3
@@ -126,9 +118,7 @@ define <8 x half> @test_q_lane8_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
 define <4 x bfloat> @test_lane0_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
 ; CHECK-LABEL: test_lane0_nxv8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x bfloat> %b, i32 0
     %d = insertelement <4 x bfloat> %a, bfloat %c, i32 0
@@ -138,9 +128,7 @@ define <4 x bfloat> @test_lane0_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat>
 define <4 x bfloat> @test_lane7_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
 ; CHECK-LABEL: test_lane7_nxv8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[7]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x bfloat> %b, i32 7
     %d = insertelement <4 x bfloat> %a, bfloat %c, i32 3
@@ -184,9 +172,7 @@ define <8 x bfloat> @test_q_lane8_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat
 define <4 x i16> @test_lane0_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: test_lane0_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x i16> %b, i32 0
     %d = insertelement <4 x i16> %a, i16 %c, i32 0
@@ -196,9 +182,7 @@ define <4 x i16> @test_lane0_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
 define <4 x i16> @test_lane7_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: test_lane7_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[7]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x i16> %b, i32 7
     %d = insertelement <4 x i16> %a, i16 %c, i32 3
@@ -243,9 +227,7 @@ define <8 x i16> @test_q_lane8_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
 define <2 x float> @test_lane0_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: test_lane0_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x float> %b, i32 0
     %d = insertelement <2 x float> %a, float %c, i32 0
@@ -255,9 +237,7 @@ define <2 x float> @test_lane0_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b)
 define <2 x float> @test_lane3_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: test_lane3_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[3]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x float> %b, i32 3
     %d = insertelement <2 x float> %a, float %c, i32 1
@@ -301,9 +281,7 @@ define <4 x float> @test_q_lane4_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b
 define <2 x i32> @test_lane0_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: test_lane0_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x i32> %b, i32 0
     %d = insertelement <2 x i32> %a, i32 %c, i32 0
@@ -313,9 +291,7 @@ define <2 x i32> @test_lane0_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
 define <2 x i32> @test_lane3_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: test_lane3_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[3]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x i32> %b, i32 3
     %d = insertelement <2 x i32> %a, i32 %c, i32 1
@@ -361,7 +337,6 @@ define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, <vscale x 2 x double> %
 ; CHECK-LABEL: test_lane0_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x double> %b, i32 0
     %d = insertelement <1 x double> %a, double %c, i32 0
@@ -372,7 +347,6 @@ define <1 x double> @test_lane1_nxv2f64(<1 x double> %a, <vscale x 2 x double> %
 ; CHECK-LABEL: test_lane1_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x double> %b, i32 1
     %d = insertelement <1 x double> %a, double %c, i32 0
@@ -417,7 +391,6 @@ define <1 x i64> @test_lane0_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: test_lane0_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x i64> %b, i32 0
     %d = insertelement <1 x i64> %a, i64 %c, i32 0
@@ -428,7 +401,6 @@ define <1 x i64> @test_lane1_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: test_lane1_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x i64> %b, i32 1
     %d = insertelement <1 x i64> %a, i64 %c, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
index 28961376d2b6d6..5b87142294839f 100644
--- a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
@@ -4,7 +4,6 @@
 define <4 x i32> @test_vins_v4i32(<4 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -16,7 +15,6 @@ entry:
 define <4 x i32> @test_vins_v4i32_0(<4 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v4i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -28,10 +26,7 @@ entry:
 define <2 x i32> @test_vins_v2i32(<2 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v2i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = bitcast float %b to i32
@@ -42,10 +37,7 @@ entry:
 define <2 x i32> @test_vins_v2i32_0(<2 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = bitcast float %b to i32
@@ -56,7 +48,6 @@ entry:
 define <2 x i64> @test_vins_v2i64(<2 x i64> %a, double %b) {
 ; CHECK-LABEL: test_vins_v2i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -68,7 +59,6 @@ entry:
 define <2 x i64> @test_vins_v2i64_0(<2 x i64> %a, double %b) {
 ; CHECK-LABEL: test_vins_v2i64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -93,7 +83,6 @@ define float @test_vext_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vext_v4i32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.s[0], v0.s[3]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <4 x i32> %a, i32 3
@@ -104,7 +93,6 @@ entry:
 define float @test_vext_v4i32_0(<4 x i32> %a) {
 ; CHECK-LABEL: test_vext_v4i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <4 x i32> %a, i32 0
@@ -115,9 +103,7 @@ entry:
 define float @test_vext_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: test_vext_v2i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], v0.s[1]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i32> %a, i32 1
@@ -128,8 +114,6 @@ entry:
 define float @test_vext_v2i32_0(<2 x i32> %a) {
 ; CHECK-LABEL: test_vext_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i32> %a, i32 0
@@ -141,7 +125,6 @@ define double @test_vext_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vext_v2i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.d[0], v0.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i64> %a, i32 1
@@ -152,7 +135,6 @@ entry:
 define double @test_vext_v2i64_0(<2 x i64> %a) {
 ; CHECK-LABEL: test_vext_v2i64_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i64> %a, i32 0
@@ -163,8 +145,6 @@ entry:
 define double @test_vext_v1i64(<1 x i64> %a) {
 ; CHECK-LABEL: test_vext_v1i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <1 x i64> %a, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/neon-luti.ll b/llvm/test/CodeGen/AArch64/neon-luti.ll
index 54366627537626..6380bfd370a470 100644
--- a/llvm/test/CodeGen/AArch64/neon-luti.ll
+++ b/llvm/test/CodeGen/AArch64/neon-luti.ll
@@ -4,8 +4,6 @@
 define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> %vn, <8 x i8> %vm, i32 0)
@@ -15,7 +13,6 @@ define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){
 define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> %vn, <16 x i8> %vm, i32 0)
@@ -25,7 +22,6 @@ define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){
 define <16 x i8> @test_luti2q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
@@ -44,8 +40,6 @@ define <16 x i8> @test_luti2q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
 define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
@@ -55,7 +49,6 @@ define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
 define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> %vn, <16 x i8> %vm, i32 0)
@@ -65,8 +58,6 @@ define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){
 define <8 x i16> @test_luti2q_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
@@ -85,8 +76,6 @@ define <8 x i16> @test_luti2q_laneq_i16(<8 x i16> %vn, <16 x i8> %vm){
 define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> %vn, <8 x i8> %vm, i32 0)
@@ -96,7 +85,6 @@ define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){
 define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4i16(<4 x half> %vn, <16 x i8> %vm, i32 0)
@@ -106,7 +94,6 @@ define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){
 define <8 x half> @test_luti2q_lane_f16(<8 x half> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> %vn, <8 x i8> %vm, i32 0)
@@ -125,8 +112,6 @@ define <8 x half> @test_luti2q_laneq_f16(<8 x half> %vn, <16 x i8> %vm){
 define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
@@ -136,7 +121,6 @@ define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
 define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> %vn, <16 x i8> %vm, i32 0)
@@ -146,8 +130,6 @@ define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){
 define <8 x bfloat> @test_luti2q_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
@@ -166,7 +148,6 @@ define <8 x bfloat> @test_luti2q_laneq_bf16(<8 x bfloat> %vn, <16 x i8> %vm){
 define <16 x i8> @test_luti4q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti4 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
@@ -185,9 +166,6 @@ define <16 x i8> @test_luti4q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
 define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_x2_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm, i32 1)
@@ -197,8 +175,6 @@ define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i
 define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_laneq_x2_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm, i32 1)
@@ -208,9 +184,6 @@ define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x
 define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <8 x i8> %vm, i32 1)
@@ -221,8 +194,6 @@ define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x
 define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_laneq_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <16 x i8> %vm, i32 1)
@@ -232,8 +203,6 @@ define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16
 define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_laneq_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <16 x i8> %vm, i32 1)
@@ -243,9 +212,6 @@ define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %v
 define <8 x bfloat> @test_luti4q_lane_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <8 x i8> %vm, i32 1)

diff  --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index c1b9a4c9dbb797..f2d6f1dc042d1b 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -35,7 +35,6 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
 ; CHECK-NODOT-LABEL: udot_narrow:
 ; CHECK-NODOT:       // %bb.0:
 ; CHECK-NODOT-NEXT:    umull v1.8h, v2.8b, v1.8b
-; CHECK-NODOT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NODOT-NEXT:    ushll v2.4s, v1.4h, #0
 ; CHECK-NODOT-NEXT:    ushll2 v3.4s, v1.8h, #0
 ; CHECK-NODOT-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -85,7 +84,6 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
 ; CHECK-NODOT-LABEL: sdot_narrow:
 ; CHECK-NODOT:       // %bb.0:
 ; CHECK-NODOT-NEXT:    smull v1.8h, v2.8b, v1.8b
-; CHECK-NODOT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NODOT-NEXT:    sshll v2.4s, v1.4h, #0
 ; CHECK-NODOT-NEXT:    sshll2 v3.4s, v1.8h, #0
 ; CHECK-NODOT-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -133,7 +131,6 @@ define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
 ; CHECK-NOI8MM:       // %bb.0:
 ; CHECK-NOI8MM-NEXT:    ushll v1.8h, v1.8b, #0
 ; CHECK-NOI8MM-NEXT:    sshll v2.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOI8MM-NEXT:    smull v3.4s, v2.4h, v1.4h
 ; CHECK-NOI8MM-NEXT:    smull2 v4.4s, v2.8h, v1.8h
 ; CHECK-NOI8MM-NEXT:    ext v5.16b, v1.16b, v1.16b, #8
@@ -187,7 +184,6 @@ define <2 x i32> @sudot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
 ; CHECK-NOI8MM:       // %bb.0:
 ; CHECK-NOI8MM-NEXT:    sshll v1.8h, v1.8b, #0
 ; CHECK-NOI8MM-NEXT:    ushll v2.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOI8MM-NEXT:    smull v3.4s, v2.4h, v1.4h
 ; CHECK-NOI8MM-NEXT:    smull2 v4.4s, v2.8h, v1.8h
 ; CHECK-NOI8MM-NEXT:    ext v5.16b, v1.16b, v1.16b, #8
@@ -386,7 +382,6 @@ define <2 x i32> @not_udot_narrow(<2 x i32> %acc, <4 x i8> %u, <4 x i8> %s) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bic v1.4h, #255, lsl #8
 ; CHECK-NEXT:    bic v2.4h, #255, lsl #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umull v3.4s, v2.4h, v1.4h
 ; CHECK-NEXT:    umlal v0.4s, v2.4h, v1.4h
 ; CHECK-NEXT:    ext v1.16b, v3.16b, v3.16b, #8

diff  --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll
index 7218204ba844ca..55609000f79af8 100644
--- a/llvm/test/CodeGen/AArch64/neon-perm.ll
+++ b/llvm/test/CodeGen/AArch64/neon-perm.ll
@@ -1800,7 +1800,6 @@ define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
 ; CHECK-GI-LABEL: test_vzip1_v4i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
  %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i8> %lo
@@ -3997,7 +3996,6 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    xtn v2.8b, v0.8h
 ; CHECK-SD-NEXT:    uzp2 v1.16b, v0.16b, v0.16b
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    fmov d0, d2
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4005,7 +4003,6 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    uzp1 v2.16b, v0.16b, v0.16b
 ; CHECK-GI-NEXT:    uzp2 v1.16b, v0.16b, v0.16b
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    fmov d0, d2
 ; CHECK-GI-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index abdfb996fa1668..e1c7dbc79566c5 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -32,9 +32,8 @@ define <4 x i32> @v4i32(<4 x i32> %a) {
 ; CHECK-GI-LABEL: v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI2_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -61,9 +60,8 @@ define <8 x i16> @v8i16(<8 x i16> %a) {
 ; CHECK-GI-LABEL: v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI4_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -71,23 +69,12 @@ entry:
 }
 
 define <8 x i16> @v8i16_2(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: v8i16_2:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    adrp x8, .LCPI5_0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v8i16_2:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI5_0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v8i16_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    ret
 entry:
   %V128 = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <8 x i16> %V128
@@ -113,9 +100,8 @@ define <16 x i8> @v16i8(<16 x i8> %a) {
 ; CHECK-GI-LABEL: v16i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -123,23 +109,12 @@ entry:
 }
 
 define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: v16i8_2:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v16i8_2:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v16i8_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI8_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    ret
 entry:
   %V128 = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <16 x i8> %V128
@@ -175,9 +150,8 @@ define <4 x float> @v4f32(<4 x float> %a) {
 ; CHECK-GI-LABEL: v4f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -204,9 +178,8 @@ define <8 x half> @v8f16(<8 x half> %a) {
 ; CHECK-GI-LABEL: v8f16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/neon-rshrn.ll b/llvm/test/CodeGen/AArch64/neon-rshrn.ll
index e648b10ea357bd..2fe3c092aab26d 100644
--- a/llvm/test/CodeGen/AArch64/neon-rshrn.ll
+++ b/llvm/test/CodeGen/AArch64/neon-rshrn.ll
@@ -850,7 +850,6 @@ entry:
 define void @rshrn_v4i16_5(<4 x i16> %a, ptr %p) {
 ; CHECK-LABEL: rshrn_v4i16_5:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn v0.8b, v0.8h, #5
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret
@@ -884,7 +883,6 @@ entry:
 define void @rshrn_v1i64_5(<1 x i64> %a, ptr %p) {
 ; CHECK-LABEL: rshrn_v1i64_5:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn v0.2s, v0.2d, #5
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
index 544d7680f01b80..2d28d80c353346 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -51,7 +51,6 @@ define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
 define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -62,7 +61,6 @@ define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
 define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_0_swap:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s2, s1, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -73,7 +71,6 @@ define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
 define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -117,7 +114,6 @@ define float @test_fmla_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) {
 define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_3_ext0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmla s0, s1, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp0 = extractelement <2 x float> %v, i32 0
@@ -129,7 +125,6 @@ define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) {
 define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_3_ext0_swp:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmla s0, s1, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp0 = extractelement <2 x float> %v, i32 0
@@ -141,8 +136,6 @@ define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) {
 define float @test_fmla_ss2S_0_ext0(float %a, <2 x float> %v, <2 x float> %w) {
 ; CHECK-LABEL: test_fmla_ss2S_0_ext0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-NEXT:    ret
   %tmp0 = extractelement <2 x float> %v, i32 0
@@ -295,7 +288,6 @@ define float @test_fmls_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
 define float @test_fmls_ss2S_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmls_ss2S_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -308,7 +300,6 @@ entry:
 define float @test_fmls_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmls_ss2S_0_swap:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -321,7 +312,6 @@ entry:
 define float @test_fmls_ss2S_1(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmls_ss2S_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov s1, v2.s[1]
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
@@ -494,7 +484,6 @@ define float @test_fmla_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #
 define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmla_ss2S_0_strict:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -505,7 +494,6 @@ define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
 define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmla_ss2S_0_swap_strict:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s2, s1, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -516,7 +504,6 @@ define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #
 define float @test_fmla_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmla_ss2S_1_strict:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -635,7 +622,6 @@ define float @test_fmls_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #
 define float @test_fmls_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmls_ss2S_0_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -648,7 +634,6 @@ entry:
 define float @test_fmls_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmls_ss2S_0_swap_strict:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -661,7 +646,6 @@ entry:
 define float @test_fmls_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmls_ss2S_1_strict:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov s1, v2.s[1]
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
index e59d9098a30d65..941c8b57fe42cb 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
@@ -6,7 +6,6 @@
 define void @histogram_i64(<2 x ptr> %buckets, i64 %inc, <2 x i1> %mask) {
 ; CHECK-LABEL: histogram_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    tbnz w8, #0, .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %else
@@ -36,7 +35,6 @@ define void @histogram_i32_literal(ptr %base, <4 x i32> %indices, <4 x i1> %mask
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    dup v2.2d, x0
 ; CHECK-NEXT:    sshll v3.2d, v0.2s, #2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umov w8, v1.h[0]
 ; CHECK-NEXT:    add v3.2d, v2.2d, v3.2d
 ; CHECK-NEXT:    tbz w8, #0, .LBB1_2

diff  --git a/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll b/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
index 64ba7b2b4ee942..9e804389432299 100644
--- a/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
@@ -67,7 +67,6 @@ define <8 x i8> @shuffle8_with_and_mask(<8 x i8> %src, <8 x i8> %mask) {
 ; CHECK-LABEL: shuffle8_with_and_mask:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.8b v2, #7
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    and.8b v1, v1, v2
 ; CHECK-NEXT:    tbl.8b v0, { v0 }, v1
 ; CHECK-NEXT:    ret
@@ -114,7 +113,6 @@ define <8 x i8> @shuffle8_with_and_mask_
diff erent_constants(<8 x i8> %src, <8 x
 ; CHECK-LABEL: shuffle8_with_and_mask_
diff erent_constants:
 ; CHECK:        // %bb.0:
 ; CHECK-NEXT:   adrp	x8, .LCPI2_0
-; CHECK-NEXT:   // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:   ldr	d2, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:   and.8b	v1, v1, v2
 ; CHECK-NEXT:   tbl.8b	v0, { v0 }, v1
@@ -151,7 +149,6 @@ define <8 x i8> @shuffle8_with_and_mask_
diff erent_constants(<8 x i8> %src, <8 x
 define <8 x i8> @shuffle8_with_mask(<8 x i8> %src, <8 x i8> %mask) {
 ; CHECK-LABEL: shuffle8_with_mask:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:  // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:  tbl.8b v0, { v0 }, v1
 ; CHECK-NEXT:  ret
 

diff  --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll
index 5d78ad24eb333d..96806f3a72a54c 100644
--- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll
+++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll
@@ -41,7 +41,6 @@ define void @v8i64_v8i32(<8 x i64> %a, ptr %result) {
 define void @v2i32_v2i16(<2 x i32> %a, ptr %result) {
 ; CHECK-LABEL: v2i32_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strh w9, [x0]
@@ -89,7 +88,6 @@ define void @v16i32_v16i16(<16 x i32> %a, ptr %result) {
 define void @v2i32_v2i8(<2 x i32> %a, ptr %result) {
 ; CHECK-LABEL: v2i32_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w9, [x0]
@@ -156,7 +154,6 @@ define void @v32i32_v32i8(<32 x i32> %a, ptr %result) {
 define void @v2i16_v2i8(<2 x i16> %a, ptr %result) {
 ; CHECK-LABEL: v2i16_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w9, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/neon-vcmla.ll b/llvm/test/CodeGen/AArch64/neon-vcmla.ll
index a648256f95a8d4..1f9e3ab6155ac3 100644
--- a/llvm/test/CodeGen/AArch64/neon-vcmla.ll
+++ b/llvm/test/CodeGen/AArch64/neon-vcmla.ll
@@ -14,7 +14,6 @@ entry:
 define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-LABEL: test_16x4_lane_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[1], #0
 ; CHECK-NEXT:    ret
 entry:
@@ -38,7 +37,6 @@ entry:
 define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-LABEL: test_rot90_16x4_lane_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[0], #90
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
index 8f05fd6cb76bd9..b0f8036879e893 100644
--- a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
+++ b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
@@ -4,7 +4,6 @@
 define <4 x i16> @shuffle1(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -15,7 +14,6 @@ entry:
 define <4 x i16> @shuffle2(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -73,7 +71,6 @@ entry:
 define <8 x i8> @shuffle7(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle7:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -85,7 +82,6 @@ entry:
 define <8 x i8> @shuffle8(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -99,7 +95,6 @@ define <8 x i8> @shuffle_not1(<16 x i8> %v) {
 ; CHECK-LABEL: shuffle_not1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %res
@@ -131,9 +126,8 @@ entry:
 define <8 x i8> @shuffle_not4(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle_not4:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
index afcced5dcb9ab5..d315c306aa37a0 100644
--- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
@@ -137,9 +137,7 @@ define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: shuffle_widen_faili1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI12_0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI12_0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -152,9 +150,7 @@ define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: shuffle_widen_fail2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI13_0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -167,9 +163,7 @@ define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: shuffle_widen_fail3:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
index ed84ec2ba9eaeb..1b55e733dfc831 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
@@ -545,10 +545,6 @@ define <5 x double> @test_ldnp_v5f64(ptr %A) {
 ; CHECK-NEXT:    ldr d4, [x0, #32]
 ; CHECK-NEXT:    ext.16b v1, v0, v0, #8
 ; CHECK-NEXT:    ext.16b v3, v2, v2, #8
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q3
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test_ldnp_v5f64:
@@ -557,13 +553,8 @@ define <5 x double> @test_ldnp_v5f64(ptr %A) {
 ; CHECK-BE-NEXT:    ld1 { v0.2d }, [x0]
 ; CHECK-BE-NEXT:    ldr d4, [x0, #32]
 ; CHECK-BE-NEXT:    ld1 { v2.2d }, [x8]
-; CHECK-BE-NEXT:    // kill: def $d4 killed $d4 killed $q4
 ; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BE-NEXT:    ext v3.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-BE-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-BE-NEXT:    // kill: def $d3 killed $d3 killed $q3
 ; CHECK-BE-NEXT:    ret
   %lv = load<5 x double>, ptr %A, align 8, !nontemporal !0
   ret <5 x double> %lv

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index f8ba150a0405ff..06ee0643c4016f 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -64,7 +64,6 @@ define void @test_stnp_v16i8(ptr %p, <16 x i8> %v) #0 {
 define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2i32:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -80,7 +79,6 @@ define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
 define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v4i16:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -96,7 +94,6 @@ define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
 define void @test_stnp_v8i8(ptr %p, <8 x i8> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v8i8:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -142,7 +139,6 @@ define void @test_stnp_v4f32(ptr %p, <4 x float> %v) #0 {
 define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -158,7 +154,6 @@ define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v1f64:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -174,7 +169,6 @@ define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
 define void @test_stnp_v1i64(ptr %p, <1 x i64> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v1i64:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -238,7 +232,6 @@ define void @test_stnp_v2f64_offset_neg(ptr %p, <2 x double> %v) #0 {
 define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #8]
 ; CHECK-LE-NEXT:    ret
@@ -255,7 +248,6 @@ define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_offset_neg(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-8]
 ; CHECK-LE-NEXT:    ret
@@ -426,7 +418,6 @@ define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
 define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_256:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    add x8, x0, #256
 ; CHECK-LE-NEXT:    stnp s0, s1, [x8]
@@ -444,7 +435,6 @@ define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset_252:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #252]
 ; CHECK-LE-NEXT:    ret
@@ -461,7 +451,6 @@ define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    sub x8, x0, #260
 ; CHECK-LE-NEXT:    stnp s0, s1, [x8]
@@ -480,7 +469,6 @@ define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg_256:
 ; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-256]
 ; CHECK-LE-NEXT:    ret
@@ -641,17 +629,9 @@ entry:
 define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
 ; CHECK-LE-LABEL: test_stnp_v17f32:
 ; CHECK-LE:       // %bb.0: // %entry
-; CHECK-LE-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-LE-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-LE-NEXT:    ldr s16, [sp, #32]
-; CHECK-LE-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-LE-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-LE-NEXT:    add x8, sp, #40
-; CHECK-LE-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-LE-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-LE-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-LE-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-LE-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-LE-NEXT:    add x8, sp, #40
 ; CHECK-LE-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-LE-NEXT:    ldr s5, [sp]
 ; CHECK-LE-NEXT:    ld1 { v16.s }[1], [x8]
@@ -683,19 +663,11 @@ define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v17f32:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-BE-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-BE-NEXT:    ldr s16, [sp, #36]
-; CHECK-BE-NEXT:    // kill: def $s5 killed $s5 def $q5
-; CHECK-BE-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-BE-NEXT:    ldr s17, [sp, #4]
 ; CHECK-BE-NEXT:    add x8, sp, #44
 ; CHECK-BE-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-BE-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-BE-NEXT:    // kill: def $s6 killed $s6 def $q6
-; CHECK-BE-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-BE-NEXT:    // kill: def $s7 killed $s7 def $q7
-; CHECK-BE-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-BE-NEXT:    ldr s1, [sp, #68]
 ; CHECK-BE-NEXT:    ld1 { v16.s }[1], [x8]
 ; CHECK-BE-NEXT:    add x8, sp, #12

diff  --git a/llvm/test/CodeGen/AArch64/phi.ll b/llvm/test/CodeGen/AArch64/phi.ll
index 55942d0e421bb9..b215a222b7fc9d 100644
--- a/llvm/test/CodeGen/AArch64/phi.ll
+++ b/llvm/test/CodeGen/AArch64/phi.ll
@@ -302,8 +302,6 @@ define <2 x i8> @tv2i8(i1 %c, ptr %p, <2 x i8> %a, <2 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: tv2i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    tbz w0, #0, .LBB10_2
 ; CHECK-GI-NEXT:  // %bb.1: // %t
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
@@ -319,7 +317,6 @@ define <2 x i8> @tv2i8(i1 %c, ptr %p, <2 x i8> %a, <2 x i8> %b) {
 ; CHECK-GI-NEXT:    umov w9, v1.b[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    mov v0.s[1], w9
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -392,12 +389,10 @@ define <4 x i8> @tv4i8(i1 %c, ptr %p, <4 x i8> %a, <4 x i8> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB12_2:
 ; CHECK-GI-NEXT:    uzp1 v0.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -502,12 +497,10 @@ define <2 x i16> @tv2i16(i1 %c, ptr %p, <2 x i16> %a, <2 x i16> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB16_2:
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -746,8 +739,6 @@ define <3 x i64> @tv3i64(i1 %c, ptr %p, <3 x i64> %a, <3 x i64> %b) {
 ;
 ; CHECK-GI-LABEL: tv3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    tbz w0, #0, .LBB26_2
 ; CHECK-GI-NEXT:  // %bb.1: // %t
 ; CHECK-GI-NEXT:    fmov d6, d0
@@ -755,18 +746,14 @@ define <3 x i64> @tv3i64(i1 %c, ptr %p, <3 x i64> %a, <3 x i64> %b) {
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    mov v6.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.16b, v7.16b
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v6.d[1]
 ; CHECK-GI-NEXT:    mov v0.16b, v6.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB26_2:
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    fmov d2, d5
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -1208,8 +1195,6 @@ define <3 x double> @tv3f64(i1 %c, ptr %p, <3 x double> %a, <3 x double> %b) {
 ;
 ; CHECK-GI-LABEL: tv3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    tbz w0, #0, .LBB42_2
 ; CHECK-GI-NEXT:  // %bb.1: // %t
 ; CHECK-GI-NEXT:    fmov d6, d0
@@ -1217,18 +1202,14 @@ define <3 x double> @tv3f64(i1 %c, ptr %p, <3 x double> %a, <3 x double> %b) {
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    mov v6.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.16b, v7.16b
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v6.d[1]
 ; CHECK-GI-NEXT:    mov v0.16b, v6.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB42_2:
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    fmov d2, d5
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e

diff  --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index f9f1cd4b1fcf76..86a48e956a7502 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -126,7 +126,6 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
 ; GISEL-NEXT:    mov w8, v0.s[0]
 ; GISEL-NEXT:    fmov w9, s1
 ; GISEL-NEXT:    add x0, x8, w9, uxtw
-; GISEL-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; GISEL-NEXT:    ret
 ;
 ; GISELO0-LABEL: popcount256:

diff  --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll
index 495541e9fd949d..f670d3f171ec7d 100644
--- a/llvm/test/CodeGen/AArch64/pow.ll
+++ b/llvm/test/CodeGen/AArch64/pow.ll
@@ -75,13 +75,10 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    fmov s1, #0.25000000
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    fmov s1, #0.25000000
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -89,7 +86,6 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[3]
@@ -97,7 +93,6 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    fmov s1, #0.25000000
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -117,13 +112,10 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    fmov d1, #0.25000000
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    add sp, sp, #48

diff  --git a/llvm/test/CodeGen/AArch64/pr-cf624b2.ll b/llvm/test/CodeGen/AArch64/pr-cf624b2.ll
index ea9588e9e3db7d..b169a709862e23 100644
--- a/llvm/test/CodeGen/AArch64/pr-cf624b2.ll
+++ b/llvm/test/CodeGen/AArch64/pr-cf624b2.ll
@@ -12,7 +12,6 @@ define linkonce_odr void @_ZN1y2beEPiRK1vPmPS1_(<8 x i8> %0, ptr %agg.tmp.i) {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    orr x9, x8, #0xf
 ; CHECK-NEXT:    orr x10, x8, #0xe

diff  --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll
index f7efab1ff66b33..725d2ddaae6ca9 100644
--- a/llvm/test/CodeGen/AArch64/pr58350.ll
+++ b/llvm/test/CodeGen/AArch64/pr58350.ll
@@ -12,7 +12,6 @@ define void @f(<1 x float> %a, i64 %b) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    bfi x9, x0, #2, #1
 ; CHECK-NEXT:    str d1, [sp]

diff  --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
index 88bab4af95d64f..cacdc7ffcc04cf 100644
--- a/llvm/test/CodeGen/AArch64/pr58431.ll
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -8,7 +8,6 @@ define i32 @f(i64 %0) {
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    udiv x10, x9, x8
 ; CHECK-NEXT:    msub x0, x10, x8, x9
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %2 = trunc i64 %0 to i32
   %3 = freeze i32 %2

diff  --git a/llvm/test/CodeGen/AArch64/pr61111.ll b/llvm/test/CodeGen/AArch64/pr61111.ll
index d2c72921a92299..fe8940e860df4a 100644
--- a/llvm/test/CodeGen/AArch64/pr61111.ll
+++ b/llvm/test/CodeGen/AArch64/pr61111.ll
@@ -4,7 +4,6 @@
 define i62 @f(i1 %0) {
 ; CHECK-LABEL: f:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    tst x8, #0x3fffffffffffffff

diff  --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
index 83dd240a6540fa..993c3eb810b1e8 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
@@ -5,11 +5,10 @@ define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5,
 ; CHECK-LABEL: callee:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    mov x0, x5
 ; CHECK-NEXT:    add x8, sp, #24
+; CHECK-NEXT:    mov x0, x5
 ; CHECK-NEXT:    stp x6, x7, [sp, #32]
 ; CHECK-NEXT:    str x5, [sp, #24]
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    str x8, [sp, #8]
 ; CHECK-NEXT:    add sp, sp, #48
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/ptradd.ll b/llvm/test/CodeGen/AArch64/ptradd.ll
index 427542377bfdae..0e6b38c86323e2 100644
--- a/llvm/test/CodeGen/AArch64/ptradd.ll
+++ b/llvm/test/CodeGen/AArch64/ptradd.ll
@@ -74,17 +74,11 @@ entry:
 define <3 x ptr> @vector_gep_v3i32(<3 x ptr> %b, <3 x i32> %off) {
 ; CHECK-SD-LABEL: vector_gep_v3i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    saddw v2.2d, v2.2d, v4.2s
+; CHECK-SD-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
 ; CHECK-SD-NEXT:    saddw v0.2d, v0.2d, v3.2s
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    saddw v2.2d, v2.2d, v4.2s
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i32:
@@ -165,10 +159,8 @@ define <3 x ptr> @vector_gep_v3i64(<3 x ptr> %b, <3 x i64> %off) {
 ; CHECK-GI-LABEL: vector_gep_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
 ; CHECK-GI-NEXT:    fmov x8, d1
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
@@ -252,28 +244,22 @@ entry:
 define <3 x ptr> @vector_gep_v3i64_base(ptr %b, <3 x i64> %off) {
 ; CHECK-SD-LABEL: vector_gep_v3i64_base:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    fmov d3, x0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    dup v1.2d, x0
+; CHECK-SD-NEXT:    fmov d3, x0
 ; CHECK-SD-NEXT:    add d2, d3, d2
 ; CHECK-SD-NEXT:    add v0.2d, v1.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i64_base:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    dup v1.2d, x0
+; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    add x8, x0, x8
-; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    add v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -343,8 +329,6 @@ define <3 x ptr> @vector_gep_v3i64_c10(ptr %b) {
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
 ; CHECK-SD-NEXT:    add d2, d3, d2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i64_c10:
@@ -435,8 +419,6 @@ define <3 x ptr> @vector_gep_v3i64_cm10(ptr %b) {
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
 ; CHECK-SD-NEXT:    add d2, d3, d2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i64_cm10:

diff  --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 2685ea9fb5d202..04b9d6187a70df 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -210,7 +210,6 @@ entry:
 define <16 x i8> @signed_minmax_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: signed_minmax_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -224,7 +223,6 @@ entry:
 define <8 x i16> @signed_minmax_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: signed_minmax_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -238,7 +236,6 @@ entry:
 define <4 x i32> @signed_minmax_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_minmax_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -254,7 +251,6 @@ entry:
 define <16 x i8> @signed_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: signed_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -268,7 +264,6 @@ entry:
 define <8 x i16> @signed_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: signed_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -282,7 +277,6 @@ entry:
 define <4 x i32> @signed_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -298,7 +292,6 @@ entry:
 define <16 x i8> @unsigned_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: unsigned_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -311,7 +304,6 @@ entry:
 define <8 x i16> @unsigned_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: unsigned_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -324,7 +316,6 @@ entry:
 define <4 x i32> @unsigned_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: unsigned_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -339,7 +330,6 @@ entry:
 define <16 x i8> @us_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: us_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -353,7 +343,6 @@ entry:
 define <8 x i16> @us_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: us_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -367,7 +356,6 @@ entry:
 define <4 x i32> @us_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: us_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -383,7 +371,6 @@ entry:
 define <16 x i8> @sminsmax_range_unsigned_i16_to_i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i16_to_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -397,7 +384,6 @@ entry:
 define <8 x i16> @sminsmax_range_unsigned_i32_to_i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i32_to_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -411,7 +397,6 @@ entry:
 define <4 x i32> @sminsmax_range_unsigned_i64_to_i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i64_to_i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/rcpc3.ll b/llvm/test/CodeGen/AArch64/rcpc3.ll
index 45770337c13208..26e7b9325846d7 100644
--- a/llvm/test/CodeGen/AArch64/rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/rcpc3.ll
@@ -77,7 +77,6 @@ define hidden <1 x i64> @test_ldap1_1xi64_lane0(ptr nocapture noundef readonly %
 ; RCPC3-LABEL: test_ldap1_1xi64_lane0:
 ; RCPC3:       // %bb.0:
 ; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
-; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; RCPC3-NEXT:    ret
 ;
 ; NO-RCPC3-LABEL: test_ldap1_1xi64_lane0:
@@ -95,7 +94,6 @@ define hidden nofpclass(nan inf) <1 x double> @test_ldap1_1xdouble_lane0(ptr noc
 ; RCPC3-LABEL: test_ldap1_1xdouble_lane0:
 ; RCPC3:       // %bb.0:
 ; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
-; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; RCPC3-NEXT:    ret
 ;
 ; NO-RCPC3-LABEL: test_ldap1_1xdouble_lane0:
@@ -181,13 +179,11 @@ define hidden void @test_stl1_1xi64_lane0(ptr nocapture noundef writeonly %a, <1
 ;
 ; RCPC3-LABEL: test_stl1_1xi64_lane0:
 ; RCPC3:       // %bb.0:
-; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
 ; RCPC3-NEXT:    ret
 ;
 ; NO-RCPC3-LABEL: test_stl1_1xi64_lane0:
 ; NO-RCPC3:       // %bb.0:
-; NO-RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-RCPC3-NEXT:    fmov x8, d0
 ; NO-RCPC3-NEXT:    stlr x8, [x0]
 ; NO-RCPC3-NEXT:    ret
@@ -200,7 +196,6 @@ define hidden void @test_stl1_1xdouble_lane0(ptr nocapture noundef writeonly %a,
 ;
 ; RCPC3-LABEL: test_stl1_1xdouble_lane0:
 ; RCPC3:       // %bb.0:
-; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
 ; RCPC3-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index 8ca521327c2e31..ebfcfd4ea0a398 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -29,7 +29,6 @@ define i1 @test_redand_v2i1(<2 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v2i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    and w8, w9, w8
@@ -51,7 +50,6 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v4i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -77,7 +75,6 @@ define i1 @test_redand_v8i1(<8 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v8i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -199,13 +196,11 @@ define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) {
 define i8 @test_redand_v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: test_redand_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redand_v1i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w0, v0.b[0]
 ; GISEL-NEXT:    ret
   %and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
@@ -245,7 +240,6 @@ define i8 @test_redand_v4i8(<4 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redand_v4i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -270,7 +264,6 @@ define i8 @test_redand_v8i8(<8 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redand_v8i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -376,7 +369,6 @@ define i16 @test_redand_v4i16(<4 x i16> %a) {
 ;
 ; GISEL-LABEL: test_redand_v4i16:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -455,7 +447,6 @@ define i32 @test_redand_v2i32(<2 x i32> %a) {
 ;
 ; GISEL-LABEL: test_redand_v2i32:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    and w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index aac31ce8b71b75..2c2577f8916371 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -29,7 +29,6 @@ define i1 @test_redor_v2i1(<2 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v2i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    orr w8, w9, w8
@@ -51,7 +50,6 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v4i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -77,7 +75,6 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v8i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -199,13 +196,11 @@ define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) {
 define i8 @test_redor_v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: test_redor_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redor_v1i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w0, v0.b[0]
 ; GISEL-NEXT:    ret
   %or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a)
@@ -247,7 +242,6 @@ define i8 @test_redor_v4i8(<4 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redor_v4i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -272,7 +266,6 @@ define i8 @test_redor_v8i8(<8 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redor_v8i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -378,7 +371,6 @@ define i16 @test_redor_v4i16(<4 x i16> %a) {
 ;
 ; GISEL-LABEL: test_redor_v4i16:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -457,7 +449,6 @@ define i32 @test_redor_v2i32(<2 x i32> %a) {
 ;
 ; GISEL-LABEL: test_redor_v2i32:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    orr w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
index 325ab444205bf9..05867346372d38 100644
--- a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
@@ -4,8 +4,6 @@
 define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) {
 ; CHECK-LABEL: v1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d0, [x0]
@@ -227,9 +225,7 @@ entry:
 define i32 @v2(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) {
 ; CHECK-LABEL: v2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
-; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d4, [x0]
 ; CHECK-NEXT:    ldr d5, [x2]
@@ -443,8 +439,6 @@ entry:
 define i32 @v3(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) {
 ; CHECK-LABEL: v3:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll
index 9a00172f947636..7e55f07cc6b7af 100644
--- a/llvm/test/CodeGen/AArch64/reduce-xor.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll
@@ -26,7 +26,6 @@ define i1 @test_redxor_v2i1(<2 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v2i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    eor w8, w9, w8
@@ -46,7 +45,6 @@ define i1 @test_redxor_v4i1(<4 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v4i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -70,7 +68,6 @@ define i1 @test_redxor_v8i1(<8 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v8i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -188,13 +185,11 @@ define <16 x i1> @test_redxor_ins_v16i1(<16 x i1> %a) {
 define i8 @test_redxor_v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: test_redxor_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redxor_v1i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w0, v0.b[0]
 ; GISEL-NEXT:    ret
   %xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a)
@@ -236,7 +231,6 @@ define i8 @test_redxor_v4i8(<4 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v4i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -261,7 +255,6 @@ define i8 @test_redxor_v8i8(<8 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v8i8:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -367,7 +360,6 @@ define i16 @test_redxor_v4i16(<4 x i16> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v4i16:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -446,7 +438,6 @@ define i32 @test_redxor_v2i32(<2 x i32> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v2i32:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    eor w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
index 9bd3ad9165cee6..96404961cfc57f 100644
--- a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
+++ b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
@@ -275,7 +275,7 @@ body:             |
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   dead $w1 = MOVi32imm 526, implicit-def $x1
   ; CHECK-NEXT:   dead $w2 = MOVi32imm 2, implicit-def $x2
-  ; CHECK-NEXT:   renamable $w19 = MOVi32imm 2, implicit-def $x19
+  ; CHECK-NEXT:   renamable $w19 = MOVi32imm 2
   ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 4, @bar, undef $x0, $x1, $x2, undef $x3, 2, 0, 2, 4, 2, 39, 2, 0, 2, 1, 2, 0, 2, 42, 2, 2, 2, 14, 2, 0, 2, 3, 2, 400, 2, 3, 2, 400, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, 2, 95, 2, 7, 2, 0, 2, 3, 2, -11, 2, 3, 2, -8280, 2, 3, 2, 45, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 5, 2, 7, 2, 0, 2, 3, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_aarch64_aapcs, implicit-def $sp, implicit-def $x0, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x20 = COPY $x0
@@ -305,7 +305,7 @@ body:             |
   ; CHECK-NEXT:   renamable $x20 = LDRXui undef renamable $x8, 0 :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   renamable $w21 = MOVi32imm -8280
   ; CHECK-NEXT:   renamable $w23 = MOVi32imm -6
-  ; CHECK-NEXT:   renamable $w25 = MOVi32imm 3, implicit-def $x25
+  ; CHECK-NEXT:   renamable $w25 = MOVi32imm 3
   ; CHECK-NEXT:   renamable $w24 = MOVi32imm 2143289344
   ; CHECK-NEXT:   renamable $x22 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead renamable $x8 = IMPLICIT_DEF
@@ -316,21 +316,21 @@ body:             |
   ; CHECK-NEXT:   KILL killed renamable $x8
   ; CHECK-NEXT:   renamable $x8 = IMPLICIT_DEF
   ; CHECK-NEXT:   KILL killed renamable $x8
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
   ; CHECK-NEXT:   B %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.bb23:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT:   liveins: $x19
+  ; CHECK-NEXT:   liveins: $x19:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $w20 = MOVi32imm 95
-  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 117, 2, 2, 2, 14, 2, 0, 2, 3, 2, 3, 2, 3, 2, 109, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w20, 2, 3, renamable $w19, 2, 3, 2, 3, 2, 3, 2, -8280, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 5, 2, 7, 2, 0, 2, 3, 2, 1, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 2, 1, 8, %stack.0, 0, 2, 4278124286, 2, 0, 2, 2, 0, 0, 1, 1, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x19 :: (load store (s64) on %stack.0)
+  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 117, 2, 2, 2, 14, 2, 0, 2, 3, 2, 3, 2, 3, 2, 109, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w20, 2, 3, killed renamable $w19, 2, 3, 2, 3, 2, 3, 2, -8280, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 5, 2, 7, 2, 0, 2, 3, 2, 1, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 2, 1, 8, %stack.0, 0, 2, 4278124286, 2, 0, 2, 2, 0, 0, 1, 1, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.bb27:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000), %bb.13(0x00000000)
-  ; CHECK-NEXT:   liveins: $w21, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $w21, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $fp = nuw nsw ADDXri renamable $x25, 1, 0
   ; CHECK-NEXT:   CBNZW $wzr, %bb.13
@@ -338,26 +338,26 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.bb41:
   ; CHECK-NEXT:   successors: %bb.5(0x7ffff777), %bb.6(0x00000889)
-  ; CHECK-NEXT:   liveins: $fp, $w21, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w21, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBZW $wzr, %bb.6
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
   ; CHECK-NEXT:   successors: %bb.7(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   B %bb.7
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.6.bb42:
   ; CHECK-NEXT:   successors: %bb.7(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w21, $w23, $w24, $x20, $x25, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w21, $w23, $w24, $x20:0x0000000000000040, $x25:0x00000000000000C0, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x19 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
-  ; CHECK-NEXT:   renamable $w22 = MOVi32imm 2, implicit-def $x22
+  ; CHECK-NEXT:   renamable $w22 = MOVi32imm 2
   ; CHECK-NEXT:   renamable $w26 = MOVi32imm 95
-  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 125, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w26, 2, 3, renamable $w22, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x22 :: (load store (s64) on %stack.1)
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
+  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 125, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w26, 2, 3, killed renamable $w22, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.1)
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x19 = COPY $xzr
   ; CHECK-NEXT:   renamable $x8 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
@@ -369,18 +369,18 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.7.bb48:
   ; CHECK-NEXT:   successors: %bb.8(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.8.bb79:
   ; CHECK-NEXT:   successors: %bb.9(0x04000000), %bb.8(0x7c000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBNZW $wzr, %bb.8
   ; CHECK-NEXT:   B %bb.9
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9.bb81:
   ; CHECK-NEXT:   successors: %bb.11(0x78787f1d), %bb.10(0x078780e3)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   STRXui $xzr, renamable $x22, 0 :: (store unordered (s64), addrspace 1)
   ; CHECK-NEXT:   CBNZW $wzr, %bb.11
@@ -388,28 +388,28 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.10.bb82:
   ; CHECK-NEXT:   successors: %bb.11(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   BL @blam.1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.11.bb83:
   ; CHECK-NEXT:   successors: %bb.12(0x7ffff777), %bb.17(0x00000889)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBZW $wzr, %bb.17
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.12:
   ; CHECK-NEXT:   successors: %bb.18(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   B %bb.18
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.13.bb85:
   ; CHECK-NEXT:   successors: %bb.14(0x40000000), %bb.15(0x40000000)
-  ; CHECK-NEXT:   liveins: $fp, $w21, $x10
+  ; CHECK-NEXT:   liveins: $fp:0x0000000000000040, $w21, $x10:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBNZW $wzr, %bb.15
   ; CHECK-NEXT:   B %bb.14
@@ -424,12 +424,12 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.15.bb90:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT:   liveins: $fp, $w21, $x10
+  ; CHECK-NEXT:   liveins: $fp:0x0000000000000040, $w21, $x10:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   $w0 = MOVi32imm 12
   ; CHECK-NEXT:   renamable $w8 = MOVi32imm 95
-  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 43, 2, 0, 2, 1, 2, 0, 2, 201, 2, 4, 2, 14, 2, 0, 2, 0, 2, 4278124286, 2, 3, renamable $w29, 2, 0, 2, 4278124286, 2, 3, renamable $w29, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w8, 2, 3, renamable $w10, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x10, implicit killed $fp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1)
+  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 43, 2, 0, 2, 1, 2, 0, 2, 201, 2, 4, 2, 14, 2, 0, 2, 0, 2, 4278124286, 2, 3, killed renamable $w29, 2, 0, 2, 4278124286, 2, 3, renamable $w29, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w8, 2, 3, killed renamable $w10, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.0), (load store (s64) on %stack.1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.16.bb94:
@@ -442,14 +442,14 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.17.bb96:
   ; CHECK-NEXT:   successors: %bb.18(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x20, $x25, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x20:0x0000000000000040, $x25:0x00000000000000C0, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x19 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
-  ; CHECK-NEXT:   renamable $w21 = MOVi32imm 2, implicit-def $x21
+  ; CHECK-NEXT:   renamable $w21 = MOVi32imm 2
   ; CHECK-NEXT:   renamable $w22 = MOVi32imm 95
-  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 250, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w22, 2, 3, renamable $w21, 2, 3, 2, 4278124286, 2, 3, renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x21 :: (load store (s64) on %stack.1), (load store (s64) on %stack.2)
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
+  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 250, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w22, 2, 3, killed renamable $w21, 2, 3, 2, 4278124286, 2, 3, renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.1), (load store (s64) on %stack.2)
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x8 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
   ; CHECK-NEXT:   dead renamable $x8 = nuw ADDXri killed renamable $x8, 24, 0
@@ -460,7 +460,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.18.bb105:
   ; CHECK-NEXT:   successors: %bb.20(0x00000000), %bb.19(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   STRWui renamable $w20, renamable $x19, 0 :: (store unordered (s32), addrspace 1)
   ; CHECK-NEXT:   STRWui renamable $w24, renamable $x26, 0 :: (store unordered (s32), align 8, addrspace 1)
@@ -469,7 +469,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.19.bb115:
   ; CHECK-NEXT:   successors: %bb.16(0x00000000), %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = COPY $xzr
   ; CHECK-NEXT:   renamable $w9 = LDRWui renamable $x8, 0 :: (load unordered (s32) from `ptr addrspace(1) null`, addrspace 1)
@@ -483,11 +483,11 @@ body:             |
   ; CHECK-NEXT:   B %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.20.bb120:
-  ; CHECK-NEXT:   liveins: $x10
+  ; CHECK-NEXT:   liveins: $x10:0x0000000000000040
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   $w0 = MOVi32imm 10
-  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 272, 2, 2, 2, 14, 2, 0, 2, 0, 2, 0, 2, 3, renamable $w10, 2, 0, 2, 0, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 0, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x10
+  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 272, 2, 2, 2, 14, 2, 0, 2, 0, 2, 0, 2, 3, killed renamable $w10, 2, 0, 2, 0, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 0, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   bb.0.bb:
     successors: %bb.1(0x80000000), %bb.2(0x00000000)

diff  --git a/llvm/test/CodeGen/AArch64/rem.ll b/llvm/test/CodeGen/AArch64/rem.ll
index d807635f5d87d1..d03ea0d62bf2c8 100644
--- a/llvm/test/CodeGen/AArch64/rem.ll
+++ b/llvm/test/CodeGen/AArch64/rem.ll
@@ -175,7 +175,6 @@ define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i8:
@@ -193,7 +192,6 @@ define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <2 x i8> %d, %e
@@ -264,7 +262,6 @@ define <4 x i8> @sv4i8(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i8:
@@ -302,8 +299,6 @@ entry:
 define <8 x i8> @sv8i8(<8 x i8> %d, <8 x i8> %e) {
 ; CHECK-SD-LABEL: sv8i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    smov w11, v1.b[0]
 ; CHECK-SD-NEXT:    smov w12, v0.b[0]
 ; CHECK-SD-NEXT:    smov w8, v1.b[1]
@@ -1095,7 +1090,6 @@ define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv2i8:
@@ -1112,7 +1106,6 @@ define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <2 x i8> %d, %e
@@ -1159,8 +1152,6 @@ entry:
 define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: uv4i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    bic v1.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    umov w11, v1.h[0]
@@ -1183,7 +1174,6 @@ define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i8:
@@ -1220,8 +1210,6 @@ entry:
 define <8 x i8> @uv8i8(<8 x i8> %d, <8 x i8> %e) {
 ; CHECK-SD-LABEL: uv8i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w11, v1.b[0]
 ; CHECK-SD-NEXT:    umov w12, v0.b[0]
 ; CHECK-SD-NEXT:    umov w8, v1.b[1]
@@ -2014,7 +2002,6 @@ define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i16:
@@ -2032,7 +2019,6 @@ define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <2 x i16> %d, %e
@@ -2042,8 +2028,6 @@ entry:
 define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-LABEL: sv3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    smov w11, v1.h[0]
 ; CHECK-SD-NEXT:    smov w12, v0.h[0]
 ; CHECK-SD-NEXT:    smov w8, v1.h[1]
@@ -2059,13 +2043,10 @@ define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[1], w8
 ; CHECK-SD-NEXT:    msub w8, w16, w14, w15
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    smov w8, v0.h[0]
 ; CHECK-GI-NEXT:    smov w9, v1.h[0]
 ; CHECK-GI-NEXT:    smov w11, v0.h[1]
@@ -2081,7 +2062,6 @@ define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    msub w8, w16, w15, w14
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <3 x i16> %d, %e
@@ -2091,8 +2071,6 @@ entry:
 define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-LABEL: sv4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    smov w11, v1.h[0]
 ; CHECK-SD-NEXT:    smov w12, v0.h[0]
 ; CHECK-SD-NEXT:    smov w8, v1.h[1]
@@ -2113,7 +2091,6 @@ define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i16:
@@ -2478,7 +2455,6 @@ define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv2i16:
@@ -2495,7 +2471,6 @@ define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <2 x i16> %d, %e
@@ -2505,8 +2480,6 @@ entry:
 define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-LABEL: uv3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w11, v1.h[0]
 ; CHECK-SD-NEXT:    umov w12, v0.h[0]
 ; CHECK-SD-NEXT:    umov w8, v1.h[1]
@@ -2527,13 +2500,10 @@ define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-NEXT:    msub w10, w15, w12, w13
 ; CHECK-SD-NEXT:    sxth w8, w10
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v1.h[0]
 ; CHECK-GI-NEXT:    umov w11, v0.h[1]
@@ -2549,7 +2519,6 @@ define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    msub w8, w16, w15, w14
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <3 x i16> %d, %e
@@ -2559,8 +2528,6 @@ entry:
 define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-LABEL: uv4i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w11, v1.h[0]
 ; CHECK-SD-NEXT:    umov w12, v0.h[0]
 ; CHECK-SD-NEXT:    umov w8, v1.h[1]
@@ -2581,7 +2548,6 @@ define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i16:
@@ -2933,8 +2899,6 @@ entry:
 define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-LABEL: sv2i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w8, s1
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    mov w11, v1.s[1]
@@ -2945,13 +2909,10 @@ define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    mov w10, v1.s[1]
@@ -2961,7 +2922,6 @@ define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <2 x i32> %d, %e
@@ -3164,8 +3124,6 @@ entry:
 define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-LABEL: uv2i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w8, s1
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    mov w11, v1.s[1]
@@ -3176,13 +3134,10 @@ define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv2i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    mov w10, v1.s[1]
@@ -3192,7 +3147,6 @@ define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <2 x i32> %d, %e
@@ -3433,12 +3387,6 @@ entry:
 define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: sv3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d3
 ; CHECK-SD-NEXT:    fmov x9, d0
 ; CHECK-SD-NEXT:    fmov x11, d4
@@ -3458,12 +3406,8 @@ define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: sv3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-GI-NEXT:    fmov x8, d0
 ; CHECK-GI-NEXT:    fmov x9, d3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x10, d4
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
@@ -3486,7 +3430,6 @@ define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-GI-NEXT:    msub x8, x12, x9, x8
 ; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -3598,12 +3541,6 @@ entry:
 define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: uv3i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d3
 ; CHECK-SD-NEXT:    fmov x9, d0
 ; CHECK-SD-NEXT:    fmov x11, d4
@@ -3623,12 +3560,8 @@ define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: uv3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-GI-NEXT:    fmov x8, d0
 ; CHECK-GI-NEXT:    fmov x9, d3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x10, d4
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
@@ -3651,7 +3584,6 @@ define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-GI-NEXT:    msub x8, x12, x9, x8
 ; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
index ec7548e1e65410..d4060f9083e97e 100644
--- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
@@ -7,7 +7,6 @@
 define i32 @testmswbf(bfloat %a) {
 ; CHECK-LABEL: testmswbf:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -31,7 +30,6 @@ entry:
 define i64 @testmsxbf(bfloat %a) {
 ; CHECK-LABEL: testmsxbf:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -141,7 +139,6 @@ entry:
 define i32 @testpswbf(bfloat %a) {
 ; CHECK-LABEL: testpswbf:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -165,7 +162,6 @@ entry:
 define i64 @testpsxbf(bfloat %a) {
 ; CHECK-LABEL: testpsxbf:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16

diff  --git a/llvm/test/CodeGen/AArch64/select-constant-xor.ll b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
index 3adf48e84b44c1..89409d6a32097a 100644
--- a/llvm/test/CodeGen/AArch64/select-constant-xor.ll
+++ b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
@@ -38,7 +38,6 @@ define i32 @selecti64i32(i64 %a) {
 define i64 @selecti32i64(i32 %a) {
 ; CHECK-LABEL: selecti32i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #31, #1
 ; CHECK-NEXT:    eor x0, x8, #0x7fffffff
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir
index 0e6c94c44712c7..b29ab7727f65d6 100644
--- a/llvm/test/CodeGen/AArch64/seqpairspill.mir
+++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir
@@ -7,11 +7,11 @@ body: |
   bb.0:
     ; Check the spill/reload sequence for the %0 register
     ; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX
-    ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8)
+    ; CHECK-NEXT: STPXi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s128) into %stack.0, align 8)
     ; CHECK: INLINEASM
-    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8)
+    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0 :: (load (s128) from %stack.0, align 8)
     ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]]
-    ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]]
+    ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]]
     %0 : xseqpairsclass = IMPLICIT_DEF
     %1 : xseqpairsclass = IMPLICIT_DEF
     %2 : gpr64common = IMPLICIT_DEF
@@ -27,11 +27,11 @@ body: |
   bb.0:
     ; Check the spill/reload sequence for the %0 register
     ; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW
-    ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4)
+    ; CHECK-NEXT: STPWi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s64) into %stack.0, align 4)
     ; CHECK: INLINEASM
-    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4)
+    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0 :: (load (s64) from %stack.0, align 4)
     ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]]
-    ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]]
+    ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]]
     %0 : wseqpairsclass = IMPLICIT_DEF
     %1 : wseqpairsclass = IMPLICIT_DEF
     %2 : gpr64common = IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
index 4dd2a896cd81cb..84fb127239bf85 100644
--- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
+++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
@@ -61,7 +61,6 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    clz x8, x0
 ; CHECK-SD-NEXT:    lsr x0, x8, #6
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
@@ -90,7 +89,6 @@ define i32 @PR17487(i1 %tobool) {
 ;
 ; CHECK-GI-LABEL: PR17487:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    mov v0.d[1], x0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI3_0
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]

diff  --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index 53fbb351954fcf..d7f02e667bb6ef 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -25,7 +25,6 @@ entry:
 define i64 @sext_i8_to_i64(i8 %a) {
 ; CHECK-LABEL: sext_i8_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtb x0, w0
 ; CHECK-NEXT:    ret
 entry:
@@ -56,7 +55,6 @@ entry:
 define i64 @sext_i16_to_i64(i16 %a) {
 ; CHECK-LABEL: sext_i16_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxth x0, w0
 ; CHECK-NEXT:    ret
 entry:
@@ -67,7 +65,6 @@ entry:
 define i64 @sext_i32_to_i64(i32 %a) {
 ; CHECK-LABEL: sext_i32_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x0, w0
 ; CHECK-NEXT:    ret
 entry:
@@ -98,7 +95,6 @@ entry:
 define i64 @sext_i10_to_i64(i10 %a) {
 ; CHECK-LABEL: sext_i10_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x0, x0, #0, #10
 ; CHECK-NEXT:    ret
 entry:
@@ -228,7 +224,6 @@ define <3 x i16> @sext_v3i8_v3i16(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i8> %a to <3 x i16>
@@ -271,18 +266,12 @@ define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #56
 ; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #56
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #56
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i8_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    sxtb x8, w0
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    sxtb x10, w2
@@ -303,7 +292,6 @@ define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v3i16_v3i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    smov w8, v0.h[0]
 ; CHECK-GI-NEXT:    smov w9, v0.h[1]
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
@@ -323,15 +311,11 @@ define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    sshll v2.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    sshll v0.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    sshll2 v2.2d, v2.4s, #0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i16_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    smov x8, v0.h[0]
 ; CHECK-GI-NEXT:    smov x9, v0.h[1]
 ; CHECK-GI-NEXT:    smov x10, v0.h[2]
@@ -349,10 +333,8 @@ define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    sshll v3.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshll2 v2.2d, v0.4s, #0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i32_v3i64:
@@ -390,7 +372,6 @@ define <3 x i16> @sext_v3i10_v3i16(<3 x i10> %a) {
 ; CHECK-GI-NEXT:    sbfx w8, w8, #6, #10
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i10> %a to <3 x i16>
@@ -433,18 +414,12 @@ define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) {
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #54
 ; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #54
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #54
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i10_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    sbfx x8, x0, #0, #10
 ; CHECK-GI-NEXT:    sbfx x9, x1, #0, #10
 ; CHECK-GI-NEXT:    sbfx x10, x2, #0, #10

diff  --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index d4f3e80e96dd19..9ac579c0fa6a91 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -1194,7 +1194,6 @@ define i32 @t(i64 %x) {
 ; CHECK-LABEL: t:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ubfx x0, x0, #17, #28
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %s = lshr i64 %x, 13
   %t = trunc i64 %s to i32

diff  --git a/llvm/test/CodeGen/AArch64/shift-by-signext.ll b/llvm/test/CodeGen/AArch64/shift-by-signext.ll
index 67e2da96084efb..3bcaee523f2e97 100644
--- a/llvm/test/CodeGen/AArch64/shift-by-signext.ll
+++ b/llvm/test/CodeGen/AArch64/shift-by-signext.ll
@@ -81,7 +81,6 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; CHECK-LABEL: n6_fshl:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr w8, w1, #1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    mvn w9, w2
 ; CHECK-NEXT:    lsl w10, w0, w2
 ; CHECK-NEXT:    lsr w8, w8, w9
@@ -95,7 +94,6 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; CHECK-LABEL: n7_fshr:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl w8, w0, #1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    mvn w9, w2
 ; CHECK-NEXT:    lsr w10, w1, w2
 ; CHECK-NEXT:    lsl w8, w8, w9

diff  --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll
index a90603195cf348..4f41edb8348b2f 100644
--- a/llvm/test/CodeGen/AArch64/shift-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-mod.ll
@@ -40,7 +40,6 @@ define i64 @test3(i64 %x, i64 %y) {
 define i64 @test4(i64 %y, i32 %s) {
 ; CHECK-LABEL: test4:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    asr x0, x0, x1
 ; CHECK-NEXT:    ret
 entry:
@@ -52,7 +51,6 @@ entry:
 define i64 @test5(i64 %y, i32 %s) {
 ; CHECK-LABEL: test5:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    asr x0, x0, x1
 ; CHECK-NEXT:    ret
 entry:
@@ -64,7 +62,6 @@ entry:
 define i64 @test6(i64 %y, i32 %s) {
 ; CHECK-LABEL: test6:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x0, x0, x1
 ; CHECK-NEXT:    ret
 entry:
@@ -127,7 +124,7 @@ define i64 @ashr_add_shl_i36(i64 %r) {
 define i64 @ashr_add_shl_mismatch_shifts1(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_mismatch_shifts1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4294967296
+; CHECK-NEXT:    mov x8, #4294967296 // =0x100000000
 ; CHECK-NEXT:    add x8, x8, x0, lsl #8
 ; CHECK-NEXT:    asr x0, x8, #32
 ; CHECK-NEXT:    ret
@@ -140,7 +137,7 @@ define i64 @ashr_add_shl_mismatch_shifts1(i64 %r) {
 define i64 @ashr_add_shl_mismatch_shifts2(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_mismatch_shifts2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4294967296
+; CHECK-NEXT:    mov x8, #4294967296 // =0x100000000
 ; CHECK-NEXT:    add x8, x8, x0, lsr #8
 ; CHECK-NEXT:    lsr x0, x8, #8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
index 066928687cc02d..9b6bc09f571c37 100644
--- a/llvm/test/CodeGen/AArch64/shift.ll
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -21,7 +21,6 @@ define i1 @shl_i1(i1 %0, i1 %1){
 define i8 @shl_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: shl_i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsl w0, w0, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -37,7 +36,6 @@ define i8 @shl_i8(i8 %0, i8 %1){
 define i16 @shl_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: shl_i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsl w0, w0, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -123,7 +121,6 @@ define i8 @ashr_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: ashr_i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sxtb w8, w0
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    asr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -141,7 +138,6 @@ define i16 @ashr_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: ashr_i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sxth w8, w0
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    asr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -230,7 +226,6 @@ define i8 @lshr_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: lshr_i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    and w8, w0, #0xff
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -248,7 +243,6 @@ define i16 @lshr_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: lshr_i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    and w8, w0, #0xffff
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -535,7 +529,6 @@ define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){
 ; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    ushl v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = shl <4 x i8> %0, %1
     ret <4 x i8> %3
@@ -571,7 +564,6 @@ define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){
 ; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    ushl v0.4h, v0.4h, v1.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = shl <2 x i16> %0, %1
     ret <2 x i16> %3
@@ -605,7 +597,6 @@ define <1 x i32> @shl_v1i32(<1 x i32> %0, <1 x i32> %1){
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    lsl w8, w8, w9
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = shl <1 x i32> %0, %1
     ret <1 x i32> %3
@@ -715,7 +706,6 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){
 ; CHECK-GI-NEXT:    neg v1.8b, v1.8b
 ; CHECK-GI-NEXT:    sshl v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = ashr <4 x i8> %0, %1
     ret <4 x i8> %3
@@ -751,7 +741,6 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){
 ; CHECK-GI-NEXT:    neg v1.4h, v1.4h
 ; CHECK-GI-NEXT:    sshl v0.4h, v0.4h, v1.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = ashr <2 x i16> %0, %1
     ret <2 x i16> %3
@@ -782,7 +771,6 @@ define <1 x i32> @ashr_v1i32(<1 x i32> %0, <1 x i32> %1){
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    asr w8, w8, w9
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = ashr <1 x i32> %0, %1
     ret <1 x i32> %3
@@ -889,7 +877,6 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){
 ; CHECK-GI-NEXT:    neg v1.8b, v1.8b
 ; CHECK-GI-NEXT:    ushl v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = lshr <4 x i8> %0, %1
     ret <4 x i8> %3
@@ -924,7 +911,6 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){
 ; CHECK-GI-NEXT:    neg v1.4h, v1.4h
 ; CHECK-GI-NEXT:    ushl v0.4h, v0.4h, v1.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = lshr <2 x i16> %0, %1
     ret <2 x i16> %3
@@ -955,7 +941,6 @@ define <1 x i32> @lshr_v1i32(<1 x i32> %0, <1 x i32> %1){
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    lsr w8, w8, w9
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = lshr <1 x i32> %0, %1
     ret <1 x i32> %3

diff  --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll
index 235fee718f539d..5e06b2d2aa87cb 100644
--- a/llvm/test/CodeGen/AArch64/shift_minsize.ll
+++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll
@@ -34,19 +34,16 @@ define i32 @f1(i64 %x, i64 %y) minsize optsize {
 ; CHECK-LABEL: f1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl x0, x0, x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-WIN-LABEL: f1:
 ; CHECK-WIN:       // %bb.0:
 ; CHECK-WIN-NEXT:    lsl x0, x0, x1
-; CHECK-WIN-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-WIN-NEXT:    ret
 ;
 ; CHECK-DARWIN-LABEL: f1:
 ; CHECK-DARWIN:       ; %bb.0:
 ; CHECK-DARWIN-NEXT:    lsl x0, x0, x1
-; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-DARWIN-NEXT:    ret
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -57,19 +54,16 @@ define i32 @f2(i64 %x, i64 %y) minsize optsize {
 ; CHECK-LABEL: f2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    asr x0, x0, x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-WIN-LABEL: f2:
 ; CHECK-WIN:       // %bb.0:
 ; CHECK-WIN-NEXT:    asr x0, x0, x1
-; CHECK-WIN-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-WIN-NEXT:    ret
 ;
 ; CHECK-DARWIN-LABEL: f2:
 ; CHECK-DARWIN:       ; %bb.0:
 ; CHECK-DARWIN-NEXT:    asr x0, x0, x1
-; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-DARWIN-NEXT:    ret
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -80,19 +74,16 @@ define i32 @f3(i64 %x, i64 %y) minsize optsize {
 ; CHECK-LABEL: f3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr x0, x0, x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-WIN-LABEL: f3:
 ; CHECK-WIN:       // %bb.0:
 ; CHECK-WIN-NEXT:    lsr x0, x0, x1
-; CHECK-WIN-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-WIN-NEXT:    ret
 ;
 ; CHECK-DARWIN-LABEL: f3:
 ; CHECK-DARWIN:       ; %bb.0:
 ; CHECK-DARWIN-NEXT:    lsr x0, x0, x1
-; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-DARWIN-NEXT:    ret
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32

diff  --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
index fb571eff39fe50..2c9dc42dc0767f 100644
--- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
+++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
@@ -21,12 +21,8 @@
 define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v4i8_16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
   %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -47,12 +43,8 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i
 define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v4i8_8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr d4, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
 ; CHECK-NEXT:    ret
   %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -101,12 +93,8 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
 define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    adrp x8, .LCPI2_0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    adrp x8, .LCPI2_0
 ; CHECK-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:    adrp x8, .LCPI2_1
@@ -214,10 +202,8 @@ define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x
 ; CHECK-LABEL: shuffle4_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d5, d2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-NEXT:    fmov d4, d0
+; CHECK-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
 ; CHECK-NEXT:    mov v4.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v5.d[1], v3.d[0]
@@ -275,17 +261,15 @@ define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v8i8_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    adrp x8, .LCPI6_0
 ; CHECK-NEXT:    mov v2.d[1], v2.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-NEXT:    adrp x8, .LCPI6_0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI6_0]
 ; CHECK-NEXT:    adrp x8, .LCPI6_1
-; CHECK-NEXT:    tbl v3.8b, { v2.16b }, v1.8b
-; CHECK-NEXT:    tbl v2.8b, { v0.16b }, v1.8b
+; CHECK-NEXT:    tbl v2.8b, { v2.16b }, v1.8b
+; CHECK-NEXT:    tbl v1.8b, { v0.16b }, v1.8b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI6_1]
-; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECK-NEXT:    ret
   %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
   %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
@@ -314,11 +298,9 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8
 define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v8i8_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    adrp x8, .LCPI7_0
 ; CHECK-NEXT:    mov v2.d[1], v2.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-NEXT:    adrp x8, .LCPI7_0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI7_0]
 ; CHECK-NEXT:    adrp x8, .LCPI7_1
 ; CHECK-NEXT:    tbl v2.8b, { v2.16b }, v1.8b
@@ -354,10 +336,8 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x
 ; CHECK-LABEL: shuffle4_v4i8_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d5, d2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    adrp x8, .LCPI8_0
 ; CHECK-NEXT:    fmov d4, d0
+; CHECK-NEXT:    adrp x8, .LCPI8_0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    mov v4.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v5.d[1], v3.d[0]
@@ -393,12 +373,8 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x
 define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) {
 ; CHECK-LABEL: shuffle4_v4i16_trunc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
   %a = trunc <4 x i16> %ae to <4 x i8>
@@ -432,13 +408,13 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %
 define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) {
 ; CHECK-LABEL: shuffle4_v4i32_trunc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v4.4h, v0.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    xtn v1.4h, v1.4s
 ; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    xtn v5.4h, v1.4s
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT:    xtn v6.4h, v2.4s
-; CHECK-NEXT:    xtn v7.4h, v3.4s
-; CHECK-NEXT:    tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b
+; CHECK-NEXT:    xtn v2.4h, v2.4s
+; CHECK-NEXT:    xtn v3.4h, v3.4s
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI10_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
   %a = trunc <4 x i32> %ae to <4 x i8>
   %b = trunc <4 x i32> %be to <4 x i8>
@@ -470,11 +446,8 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %
 define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) {
 ; CHECK-LABEL: shuffle3_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
 ; CHECK-NEXT:    ret
   %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -504,9 +477,8 @@ define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: shuffle3_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d3, d2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    adrp x8, .LCPI12_0
 ; CHECK-NEXT:    fmov d2, d0
+; CHECK-NEXT:    adrp x8, .LCPI12_0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI12_0]
 ; CHECK-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
@@ -562,13 +534,11 @@ define <4 x i32> @shuffle3_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) {
 ; CHECK-LABEL: insert4_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    mov v4.16b, v3.16b
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    adrp x9, .LCPI14_1
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    mov v3.16b, v1.16b
+; CHECK-NEXT:    adrp x9, .LCPI14_1
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI14_1]
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
@@ -632,16 +602,13 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8>
 define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) {
 ; CHECK-LABEL: insert4_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v4.16b, v3.16b
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v3.16b, v1.16b
-; CHECK-NEXT:    ldr q5, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    mov v2.16b, v1.16b
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    adrp x8, .LCPI15_1
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
-; CHECK-NEXT:    tbl v31.16b, { v3.16b, v4.16b }, v5.16b
+; CHECK-NEXT:    tbl v31.16b, { v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    tbl v0.16b, { v31.16b, v0.16b }, v1.16b
 ; CHECK-NEXT:    ret
   %e1 = extractelement <8 x i8> %a, i32 4
@@ -705,6 +672,7 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2
 ; CHECK-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-NEXT:    frintm v1.2d, v1.2d
 ; CHECK-NEXT:    frintm v5.2d, v5.2d
+; CHECK-NEXT:    ldr q16, [x8, :lo12:.LCPI16_0]
 ; CHECK-NEXT:    frintm v2.2d, v2.2d
 ; CHECK-NEXT:    frintm v6.2d, v6.2d
 ; CHECK-NEXT:    frintm v3.2d, v3.2d
@@ -717,17 +685,16 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2
 ; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
 ; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
 ; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    xtn v16.2s, v0.2d
-; CHECK-NEXT:    xtn v20.2s, v4.2d
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT:    xtn v17.2s, v1.2d
-; CHECK-NEXT:    xtn v21.2s, v5.2d
-; CHECK-NEXT:    xtn v18.2s, v2.2d
-; CHECK-NEXT:    xtn v22.2s, v6.2d
-; CHECK-NEXT:    xtn v19.2s, v3.2d
-; CHECK-NEXT:    xtn v23.2s, v7.2d
-; CHECK-NEXT:    tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
-; CHECK-NEXT:    tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    xtn v4.2s, v4.2d
+; CHECK-NEXT:    xtn v1.2s, v1.2d
+; CHECK-NEXT:    xtn v5.2s, v5.2d
+; CHECK-NEXT:    xtn v2.2s, v2.2d
+; CHECK-NEXT:    xtn v6.2s, v6.2d
+; CHECK-NEXT:    xtn v3.2s, v3.2d
+; CHECK-NEXT:    xtn v7.2s, v7.2d
+; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
+; CHECK-NEXT:    tbl v2.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll
index 41dd7f06712d24..18f56d2fec7916 100644
--- a/llvm/test/CodeGen/AArch64/shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/shuffles.ll
@@ -316,8 +316,6 @@ define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b)
 define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
 ; CHECKLE-LABEL: test_shuf7:
 ; CHECKLE:       // %bb.0:
-; CHECKLE-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECKLE-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECKLE-NEXT:    mov v0.h[2], v1.h[3]
 ; CHECKLE-NEXT:    trn1 v0.4h, v0.4h, v0.4h
 ; CHECKLE-NEXT:    ret
@@ -338,10 +336,8 @@ define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
 define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
 ; CHECKLE-LABEL: test_shuf8:
 ; CHECKLE:       // %bb.0:
-; CHECKLE-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECKLE-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECKLE-NEXT:    adrp x8, .LCPI12_0
 ; CHECKLE-NEXT:    mov v0.d[1], v1.d[0]
+; CHECKLE-NEXT:    adrp x8, .LCPI12_0
 ; CHECKLE-NEXT:    ldr d1, [x8, :lo12:.LCPI12_0]
 ; CHECKLE-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECKLE-NEXT:    ret
@@ -366,9 +362,7 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
 ; CHECKLE-LABEL: test_shuf9:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI13_0
-; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -378,10 +372,10 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI13_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI13_0
-; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
-; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -418,9 +412,7 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf11:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI15_0
-; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
-; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -430,10 +422,10 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI15_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI15_0
-; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
-; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -446,9 +438,7 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf12:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI16_0
-; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
-; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -458,10 +448,10 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI16_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI16_0
-; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
-; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -474,9 +464,7 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf13:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI17_0
-; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
-; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -486,10 +474,10 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI17_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI17_0
-; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
-; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -502,9 +490,7 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf14:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI18_0
-; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI18_0]
-; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -514,10 +500,10 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI18_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI18_0
-; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
-; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -530,9 +516,7 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf15:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI19_0
-; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI19_0]
-; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -542,10 +526,10 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI19_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI19_0
-; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
-; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index 0f5b240e387ed0..d33e36c487e45b 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -7,46 +7,30 @@
 define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-SD-LABEL: shufflevector_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v8i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
     ret <8 x i8> %c
 }
 
 define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: shufflevector_v16i8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: shufflevector_v16i8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI1_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: shufflevector_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    ret
     %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
     ret <16 x i8> %c
 }
@@ -61,23 +45,12 @@ define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) {
 }
 
 define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: shufflevector_v8i16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    adrp x8, .LCPI3_0
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: shufflevector_v8i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI3_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: shufflevector_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI3_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    ret
     %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
     ret <8 x i16> %c
 }
@@ -123,7 +96,6 @@ define <2 x ptr> @shufflevector_v2p0(<2 x ptr> %a, <2 x ptr> %b) {
 define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: shufflevector_v8i8_zeroes:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -142,7 +114,6 @@ define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) {
 define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: shufflevector_v4i16_zeroes:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -161,7 +132,6 @@ define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) {
 define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: shufflevector_v2i32_zeroes:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0>
@@ -200,17 +170,12 @@ define <2 x ptr> @shufflevector_v2p0_zeroes(<2 x ptr> %a, <2 x ptr> %b) {
 define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-SD-LABEL: shufflevector_v2i1:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v2i1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w8, v1.s[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w9, v0.s[1]
 ; CHECK-GI-NEXT:    mov v1.b[1], w8
 ; CHECK-GI-NEXT:    mov v0.b[1], w9
@@ -220,7 +185,6 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-GI-NEXT:    umov w9, v0.b[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    mov v0.s[1], w9
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3>
     ret <2 x i1> %c
@@ -257,26 +221,25 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
 define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
 ; CHECK-SD-LABEL: shufflevector_v32i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 def $q1_q2
+; CHECK-SD-NEXT:    mov v1.16b, v2.16b
 ; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-SD-NEXT:    adrp x9, .LCPI18_1
-; CHECK-SD-NEXT:    mov v1.16b, v0.16b
-; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
-; CHECK-SD-NEXT:    ldr q4, [x9, :lo12:.LCPI18_1]
-; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v3.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v4.16b
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI18_0]
+; CHECK-SD-NEXT:    ldr q3, [x9, :lo12:.LCPI18_1]
+; CHECK-SD-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v32i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v3.16b, v0.16b
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
 ; CHECK-GI-NEXT:    adrp x8, .LCPI18_1
 ; CHECK-GI-NEXT:    adrp x9, .LCPI18_0
-; CHECK-GI-NEXT:    mov v4.16b, v2.16b
 ; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI18_1]
-; CHECK-GI-NEXT:    ldr q1, [x9, :lo12:.LCPI18_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v3.16b, v4.16b }, v0.16b
-; CHECK-GI-NEXT:    tbl v1.16b, { v3.16b, v4.16b }, v1.16b
+; CHECK-GI-NEXT:    ldr q3, [x9, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-GI-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v3.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
     ret <32 x i8> %c
@@ -314,26 +277,25 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
 define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
 ; CHECK-SD-LABEL: shufflevector_v16i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 def $q1_q2
+; CHECK-SD-NEXT:    mov v1.16b, v2.16b
 ; CHECK-SD-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-SD-NEXT:    adrp x9, .LCPI20_1
-; CHECK-SD-NEXT:    mov v1.16b, v0.16b
-; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
-; CHECK-SD-NEXT:    ldr q4, [x9, :lo12:.LCPI20_1]
-; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v3.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v4.16b
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI20_0]
+; CHECK-SD-NEXT:    ldr q3, [x9, :lo12:.LCPI20_1]
+; CHECK-SD-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-SD-NEXT:    mov v0.16b, v2.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v16i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v3.16b, v0.16b
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
 ; CHECK-GI-NEXT:    adrp x8, .LCPI20_1
 ; CHECK-GI-NEXT:    adrp x9, .LCPI20_0
-; CHECK-GI-NEXT:    mov v4.16b, v2.16b
 ; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI20_1]
-; CHECK-GI-NEXT:    ldr q1, [x9, :lo12:.LCPI20_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v3.16b, v4.16b }, v0.16b
-; CHECK-GI-NEXT:    tbl v1.16b, { v3.16b, v4.16b }, v1.16b
+; CHECK-GI-NEXT:    ldr q3, [x9, :lo12:.LCPI20_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-GI-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v3.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
     ret <16 x i16> %c
@@ -360,10 +322,8 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK-GI-LABEL: shufflevector_v8i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
-; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI22_0]
-; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    tbl v1.16b, { v2.16b, v3.16b }, v4.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
@@ -439,13 +399,11 @@ define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
 define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-SD-LABEL: shufflevector_v2i1_zeroes:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v2i1_zeroes:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
 ; CHECK-GI-NEXT:    mov v0.b[1], w8
 ; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
@@ -453,7 +411,6 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-GI-NEXT:    umov w9, v0.b[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    mov v0.s[1], w9
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0>
     ret <2 x i1> %c
@@ -464,7 +421,6 @@ define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-SD-NEXT:    fmov w0, s0
@@ -497,7 +453,6 @@ define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v1.2s, v0.s[0]
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    strh w9, [sp, #12]
@@ -599,23 +554,18 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
 define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
 ; CHECK-SD-LABEL: shufflevector_v7i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    adrp x8, .LCPI36_0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    adrp x8, .LCPI36_0
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v7i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
     ret <7 x i8> %c
@@ -630,36 +580,22 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
 ;
 ; CHECK-GI-LABEL: shufflevector_v3i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI37_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4>
     ret <3 x i16> %c
 }
 
 define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
-; CHECK-SD-LABEL: shufflevector_v7i16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    adrp x8, .LCPI38_0
-; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
-; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: shufflevector_v7i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: shufflevector_v7i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI38_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    ret
     %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
     ret <7 x i16> %c
 }
@@ -674,9 +610,7 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
 ; CHECK-GI-LABEL: shufflevector_v3i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
-; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI39_0]
-; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4>
@@ -709,7 +643,6 @@ define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) {
 define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
 ; CHECK-LABEL: shufflevector_v7i8_zeroes:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -719,7 +652,6 @@ define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
 define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) {
 ; CHECK-LABEL: shufflevector_v3i16_zeroes:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index f65a08ae2acea0..4d383fefc43c7b 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -298,7 +298,6 @@ exit:
 define i32 @f6(i1 %c, ptr %a, i32 %i) {
 ; CHECK-LABEL: f6:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    tbz w0, #0, .LBB6_2
 ; CHECK-NEXT:  // %bb.1: // %if.then
 ; CHECK-NEXT:    mov w0, wzr

diff  --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
index aee705f0be9b9b..1341fa783d06b9 100644
--- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
@@ -133,7 +133,6 @@ define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target(
 ;
 ; CHECK-O3-LABEL: test_sel:
 ; CHECK-O3:       // %bb.0:
-; CHECK-O3-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-O3-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-O3-NEXT:    whilelo p2.b, xzr, x8
 ; CHECK-O3-NEXT:    sel p0.b, p2, p0.b, p1.b

diff  --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
index 8e3866fcec89a7..1fd616938822e8 100644
--- a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
+++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
@@ -28,7 +28,6 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
   ; CHECK-REGALLOC-NEXT:   STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
   ; CHECK-REGALLOC-NEXT:   renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
-  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL killed renamable $q0, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
@@ -59,7 +58,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL renamable $q0, implicit killed $z0
   ; CHECK-REGALLOC-NEXT:   STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
   ; CHECK-REGALLOC-NEXT:   $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
@@ -95,7 +93,6 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
   ; CHECK-REGALLOC-NEXT:   STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
   ; CHECK-REGALLOC-NEXT:   renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
-  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL killed renamable $q0, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp

diff  --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
index 57f8e5438eaf2b..d40618f2678b69 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
@@ -299,7 +299,6 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
 define void @ldr_with_off_var(ptr %base, i32 %off) {
 ; CHECK-LABEL: ldr_with_off_var:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    rdsvl x9, #1
 ; CHECK-NEXT:    add w12, w1, #16

diff  --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
index 1ff32aade4a1f9..03c1f28fbaa184 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
@@ -299,7 +299,6 @@ define void @str_with_off_16mulvl(ptr %ptr) {
 define void @str_with_off_var(ptr %base, i32 %off) {
 ; CHECK-LABEL: str_with_off_var:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    rdsvl x9, #1
 ; CHECK-NEXT:    add w12, w1, #16

diff  --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index 500c51159dd91f..f4ef07bcea7132 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -160,11 +160,9 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
@@ -201,11 +199,9 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
@@ -242,11 +238,9 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -288,11 +282,9 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -330,11 +322,9 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -372,11 +362,9 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -414,11 +402,9 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -456,11 +442,9 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
@@ -498,11 +482,9 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -540,11 +522,9 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -586,11 +566,9 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -627,11 +605,9 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -668,11 +644,9 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -709,11 +683,9 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -750,11 +722,9 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -791,11 +761,9 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -832,11 +800,9 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -873,11 +839,9 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -917,12 +881,10 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    and z1.b, z1.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
 ; CHECK-NEXT:    str p0, [x8, #7, mul vl] // 2-byte Folded Spill
@@ -1089,9 +1051,8 @@ define void @dont_coalesce_res_f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_f16
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1329,9 +1290,8 @@ define void @dont_coalesce_res_v1f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v1f16
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1436,9 +1396,8 @@ define void @dont_coalesce_res_v16i8(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v16i8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1470,9 +1429,8 @@ define void @dont_coalesce_res_v8i16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v8i16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1504,9 +1462,8 @@ define void @dont_coalesce_res_v4i32(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v4i32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1538,9 +1495,8 @@ define void @dont_coalesce_res_v2i64(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v2i64
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1572,9 +1528,8 @@ define void @dont_coalesce_res_v8f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v8f16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1606,9 +1561,8 @@ define void @dont_coalesce_res_v4f32(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v4f32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1640,9 +1594,8 @@ define void @dont_coalesce_res_v2f64(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v2f64
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index 572b1fff3520a9..05b62d2b6abcbf 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -112,10 +112,8 @@ define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_psta
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    index z0.d, #0, #1
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
 ; CHECK-NEXT:    add z0.d, z0.d, #41 // =0x29
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index 58992eb2d592a6..104b640f0f9804 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -139,12 +139,10 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    add x8, sp, #16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __arm_sme_state
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    and x19, x0, #0x1
 ; CHECK-NEXT:    tbz w19, #0, .LBB4_2
@@ -162,9 +160,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
index 38d3bed2eaf907..2774394e46d6dd 100644
--- a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
@@ -6,8 +6,6 @@
 define <vscale x 16 x i8> @fcvt_x2(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) {
 ; CHECK-LABEL: fcvt_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvt z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1)
@@ -17,10 +15,6 @@ define <vscale x 16 x i8> @fcvt_x2(<vscale x 8 x half> %zn0, <vscale x 8 x half>
 define <vscale x 16 x i8> @fcvt_x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
 ; CHECK-LABEL: fcvt_x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fcvt z0.b, { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
@@ -31,10 +25,6 @@ define <vscale x 16 x i8> @fcvt_x4(<vscale x 4 x float> %zn0, <vscale x 4 x floa
 define <vscale x 16 x i8> @fcvtn(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
 ; CHECK-LABEL: fcvtn:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fcvtn z0.b, { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
@@ -45,8 +35,6 @@ define <vscale x 16 x i8> @fcvtn(<vscale x 4 x float> %zn0, <vscale x 4 x float>
 define <vscale x 16 x i8> @bfcvt(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) {
 ; CHECK-LABEL: bfcvt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvt z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
index dbf47d980d26eb..d436d647e6de0f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
@@ -6,9 +6,7 @@ target triple = "aarch64-linux"
 define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #0 {
 ; CHECK-LABEL: add_f16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    fadd za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -21,11 +19,7 @@ define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
 ; CHECK-LABEL: add_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    fadd za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -41,9 +35,7 @@ define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #1 {
 ; CHECK-LABEL: sub_f16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    fsub za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -56,11 +48,7 @@ define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
 ; CHECK-LABEL: sub_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    fsub za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -76,9 +64,7 @@ define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
 ; CHECK-LABEL: add_bf16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    bfadd za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -91,11 +77,7 @@ define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
 ; CHECK-LABEL: add_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    bfadd za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -111,9 +93,7 @@ define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
 ; CHECK-LABEL: sub_bf16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    bfsub za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -126,11 +106,7 @@ define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @sub_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
 ; CHECK-LABEL: sub_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    bfsub za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
index eee577ce48349f..ea5f46481551d4 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
@@ -8,9 +8,7 @@
 define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -27,9 +25,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
 define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -50,11 +46,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
 define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -75,11 +67,7 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
 define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    add za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -105,11 +93,7 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -128,11 +112,7 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -155,15 +135,7 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -187,15 +159,7 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -223,9 +187,7 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -238,9 +200,7 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -253,9 +213,7 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    fadd za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -270,9 +228,7 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    fadd za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -289,11 +245,7 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -310,11 +262,7 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -331,11 +279,7 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    fadd za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -352,11 +296,7 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    fadd za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
index 9b68f25bb0649c..f539ac3d0904d9 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
@@ -7,8 +7,6 @@
 define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: multi_vector_cvtn_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtn z0.h, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
@@ -22,8 +20,6 @@ define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1,
 define <vscale x 8 x bfloat> @multi_vector_bfcvtn_x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: multi_vector_bfcvtn_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvtn z0.h, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvtn.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
index ecfbb47ba55719..985613ccdd866b 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
@@ -53,20 +53,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }@multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1,  <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
@@ -75,20 +75,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1,  <vscale x 4 x float> %zm2,  <vscale x 4 x float> %zm3,  <vscale x 4 x float> %zm4)
   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
@@ -97,20 +97,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sme.famax.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
@@ -171,20 +171,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_main_multi_x2
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sme.famin.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -195,20 +195,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sme.famin.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -219,20 +219,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sme.famin.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
index 006e8aa2475ba0..a03af63b86a73e 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
@@ -6,9 +6,7 @@
 define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -25,9 +23,7 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -44,11 +40,7 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_single_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -68,11 +60,7 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_single_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -94,9 +82,7 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -113,9 +99,7 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -132,11 +116,7 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_single_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -156,11 +136,7 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_single_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -182,11 +158,7 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
 ; CHECK-LABEL: multi_vector_add_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -204,11 +176,7 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_add_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
 ; CHECK-LABEL: multi_vector_add_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -244,15 +212,7 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, <vscale x 4 x float>
 define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -270,15 +230,7 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_add_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -320,11 +272,7 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, <vscale x 4 x float>
 define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
 ; CHECK-LABEL: multi_vector_sub_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -342,11 +290,7 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
 ; CHECK-LABEL: multi_vector_sub_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -364,15 +308,7 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <v
 define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -390,15 +326,7 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -418,9 +346,7 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <v
 define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    ret
@@ -437,9 +363,7 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    ret
@@ -458,8 +382,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s_regclass:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z5.d, z0.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z4.s, z5.s }, z2.s[3]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 %slice,
@@ -471,11 +395,7 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
 define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    ret
@@ -495,11 +415,7 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    ret
@@ -521,8 +437,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s_regclass:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    mov z27.d, z0.d
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z24.s - z27.s }, z4.s[3]
@@ -540,9 +456,7 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
 define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    ret
@@ -559,9 +473,7 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    ret
@@ -578,11 +490,7 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    ret
@@ -602,11 +510,7 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
index ca149f41dbb835..46e2040715e8f1 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
@@ -26,18 +26,18 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
 define void @fdot_multi_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: fdot_multi_za32_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x half> %zn4, <vscale x 8 x half> %zn5, <vscale x 8 x half> %zn6, <vscale x 8 x half> %zn7) #0 {
   call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
@@ -71,18 +71,18 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
 define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    bfdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zn5, <vscale x 8 x bfloat> %zn6, <vscale x 8 x bfloat> %zn7) #0 {
   call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
@@ -99,9 +99,7 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
 define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) #0 {
 ; CHECK-LABEL: fdot_single_za32_f16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -114,11 +112,7 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
 define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) #0 {
 ; CHECK-LABEL: fdot_single_za32_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -134,9 +128,7 @@ define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
 define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) #0 {
 ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -149,11 +141,7 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused
 define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) #0 {
 ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -170,8 +158,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
 ; CHECK-LABEL: fdot_lane_za32_f16_vg1x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    fdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    ret
@@ -185,8 +173,8 @@ define void @fdot_lane_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
 ; CHECK-LABEL: fdot_lane_za32_f16_vg1x4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]
@@ -207,8 +195,8 @@ define void @bfdot_lane_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
 ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    ret
@@ -222,8 +210,8 @@ define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
 ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
index 591fe8da6b79cd..7ef319694917ef 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
@@ -6,8 +6,6 @@
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x2_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm)
@@ -17,8 +15,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x2_h
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x2_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.s, z1.s }, { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm)
@@ -28,8 +24,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x2
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_x2_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.d, z1.d }, { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm)
@@ -41,10 +35,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x4_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm)
@@ -54,10 +44,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x4_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2,  <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.s - z3.s }, { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm)
@@ -67,10 +53,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_x4_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.d - z3.d }, { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm)
@@ -81,10 +63,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x2_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -94,10 +72,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x2_half( <v
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x2_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2 ) {
 ; CHECK-LABEL: multi_vec_scale_x2_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
@@ -107,10 +81,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x2_float(
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x2_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
@@ -121,14 +91,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x2_doub
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x4_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
@@ -138,14 +100,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x4_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2,  <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
@@ -155,14 +109,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x4_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
index c7a2ec16e3f7f6..3e6f8be057721f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
@@ -10,9 +10,7 @@
 define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.b[w12, 0:1], { z0.b, z1.b }
 ; CHECK-NEXT:    mov za0h.b[w12, 14:15], { z0.b, z1.b }
 ; CHECK-NEXT:    ret
@@ -25,9 +23,7 @@ define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -40,9 +36,7 @@ define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -55,9 +49,7 @@ define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscal
 define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -70,9 +62,7 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vs
 define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3h.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -85,9 +75,7 @@ define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3h.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -100,9 +88,7 @@ define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vsca
 define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
@@ -112,9 +98,7 @@ define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
@@ -126,9 +110,7 @@ define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsc
 define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.b[w12, 0:1], { z0.b, z1.b }
 ; CHECK-NEXT:    mov za0v.b[w12, 14:15], { z0.b, z1.b }
 ; CHECK-NEXT:    ret
@@ -141,9 +123,7 @@ define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -156,9 +136,7 @@ define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -171,9 +149,7 @@ define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale
 define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -186,9 +162,7 @@ define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vsc
 define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3v.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -201,9 +175,7 @@ define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3v.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -216,9 +188,7 @@ define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscal
 define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
@@ -228,9 +198,7 @@ define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
@@ -246,11 +214,7 @@ define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsca
 define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.b[w12, 0:3], { z0.b - z3.b }
 ; CHECK-NEXT:    mov za0h.b[w12, 12:15], { z0.b - z3.b }
 ; CHECK-NEXT:    ret
@@ -263,11 +227,7 @@ define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -280,11 +240,7 @@ define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -297,11 +253,7 @@ define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscal
 define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -314,11 +266,7 @@ define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vs
 define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
@@ -328,11 +276,7 @@ define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
@@ -342,11 +286,7 @@ define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vsca
 define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
@@ -356,11 +296,7 @@ define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
@@ -372,11 +308,7 @@ define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsc
 define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.b[w12, 0:3], { z0.b - z3.b }
 ; CHECK-NEXT:    mov za0v.b[w12, 12:15], { z0.b - z3.b }
 ; CHECK-NEXT:    ret
@@ -389,11 +321,7 @@ define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -406,11 +334,7 @@ define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -423,11 +347,7 @@ define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale
 define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -440,11 +360,7 @@ define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vsc
 define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
@@ -454,11 +370,7 @@ define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
@@ -468,11 +380,7 @@ define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscal
 define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
@@ -482,11 +390,7 @@ define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
@@ -500,9 +404,7 @@ define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsca
 define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -515,9 +417,7 @@ define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16
 define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -530,9 +430,7 @@ define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x
 define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -545,9 +443,7 @@ define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x
 define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -560,9 +456,7 @@ define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale
 define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -575,9 +469,7 @@ define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x
 define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -590,9 +482,7 @@ define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x
 define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -605,9 +495,7 @@ define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x
 define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -624,11 +512,7 @@ define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale
 define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2, <vscale x 16 x i8> %za3, <vscale x 16 x i8> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -641,11 +525,7 @@ define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16
 define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2, <vscale x 8 x i16> %za3, <vscale x 8 x i16> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -658,11 +538,7 @@ define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x
 define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2, <vscale x 8 x half> %za3, <vscale x 8 x half> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -675,11 +551,7 @@ define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x
 define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2, <vscale x 8 x bfloat> %za3, <vscale x 8 x bfloat> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -692,11 +564,7 @@ define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale
 define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2, <vscale x 4 x i32> %za3, <vscale x 4 x i32> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -709,11 +577,7 @@ define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x
 define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2, <vscale x 4 x float> %za3, <vscale x 4 x float> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -726,11 +590,7 @@ define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x
 define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -743,11 +603,7 @@ define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x
 define void @za_write_vg1x4_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
index 778f31194baf45..b1ca793bac96ed 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
@@ -6,8 +6,6 @@ target triple = "aarch64-linux"
 define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>}  @test_luti4_zt_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) #0 {
 ; CHECK-LABEL: test_luti4_zt_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    luti4 { z0.b - z3.b }, zt0, { z0, z1 }
 ; CHECK-NEXT:    ret
   %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
index 9d865b1e744716..cc27a8db7797d9 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
@@ -114,8 +114,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_u64(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_max_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -316,10 +314,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_max_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -511,10 +505,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_u64(<v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_max_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -573,20 +563,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -598,20 +588,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -623,20 +613,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -648,20 +638,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -675,20 +665,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -700,20 +690,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -725,20 +715,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -750,20 +740,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -777,14 +767,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_max_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -796,20 +778,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -821,20 +803,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -846,20 +828,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -873,8 +855,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -927,10 +907,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_maxnm_single
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -998,10 +974,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_maxnm_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1060,14 +1032,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x2_f64(
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1079,20 +1043,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1103,20 +1067,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1127,20 +1091,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
index 575bcbc919b8cc..223dd355edb9cc 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
@@ -114,8 +114,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_min_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -316,10 +314,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_min_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_min_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -511,10 +505,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_min_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -573,20 +563,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -598,20 +588,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -623,20 +613,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -648,20 +638,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -675,20 +665,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -700,20 +690,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -725,20 +715,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -750,20 +740,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -778,14 +768,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_min_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_min_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmin.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -797,20 +779,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -822,20 +804,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -847,20 +829,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -874,8 +856,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -928,10 +908,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_minnm_single
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_minnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -999,10 +975,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_minnm_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1061,14 +1033,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_minnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fminnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1080,20 +1044,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1104,20 +1068,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1128,20 +1092,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
index d33609b6351274..70baa34d884b55 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
@@ -38,9 +38,7 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -53,9 +51,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -70,11 +66,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -87,11 +79,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -142,18 +130,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -164,18 +152,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_add_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -217,8 +205,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -232,8 +220,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -249,8 +237,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -266,8 +254,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -314,9 +302,7 @@ define void @multi_vector_mul_add_single_long_vg4x1_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -329,9 +315,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -346,11 +330,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -363,11 +343,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -418,18 +394,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -440,18 +416,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_add_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -493,8 +469,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -508,8 +484,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -525,8 +501,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -542,8 +518,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -590,9 +566,7 @@ define void @multi_vector_mul_sub_single_long_vg4x1_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -605,9 +579,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -622,11 +594,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -639,11 +607,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -694,18 +658,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -716,18 +680,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -769,8 +733,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -784,8 +748,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -801,8 +765,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -818,8 +782,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -866,9 +830,7 @@ define void @multi_vector_mul_sub_single_long_vg4x1_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -881,9 +843,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -898,11 +858,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -915,11 +871,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -970,18 +922,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -992,18 +944,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -1045,8 +997,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1060,8 +1012,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -1077,8 +1029,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1094,8 +1046,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -1116,9 +1068,7 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -1133,11 +1083,7 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscal
 define void @multi_vector_mul_add_single_signed_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -1168,8 +1114,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, <vscale
 ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1185,8 +1131,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x4_s8(i32 %slice, <vscale
 ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1220,9 +1166,7 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x1_s8(i32 %slice, <vsc
 define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -1237,11 +1181,7 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vsc
 define void @multi_vector_mul_add_single_unsigned_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -1275,18 +1215,18 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, <vsca
 define void @multi_vector_mul_add_multi_unsigned_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -1315,8 +1255,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, <vscal
 ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1332,8 +1272,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x4_s8(i32 %slice, <vscal
 ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
index e817dac3d1a6b1..20251aff99de14 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
@@ -120,9 +120,7 @@ define void @multi_vector_sub_single_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -135,9 +133,7 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -150,9 +146,7 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -165,9 +159,7 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -184,9 +176,7 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -199,9 +189,7 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -214,9 +202,7 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -229,9 +215,7 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -248,11 +232,7 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -269,11 +249,8 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -290,11 +267,7 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -311,11 +284,7 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -336,11 +305,7 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -357,11 +322,7 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -378,11 +339,7 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -399,11 +356,7 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -424,11 +377,7 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0,  <vscale x 8 x bfloat> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -443,11 +392,7 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -462,11 +407,7 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -481,11 +422,7 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -504,11 +441,7 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0, <vscale x 8 x bfloat> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -523,11 +456,7 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -542,11 +471,7 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -561,11 +486,7 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -584,15 +505,7 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -610,15 +523,7 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -636,15 +541,7 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -662,15 +559,7 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -692,15 +581,7 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -718,15 +599,7 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -744,15 +617,7 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -770,15 +635,7 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -912,9 +769,7 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %zn,
 define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -929,9 +784,7 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -946,9 +799,7 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -963,9 +814,7 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -984,9 +833,7 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1001,9 +848,7 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1018,9 +863,7 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1035,9 +878,7 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1056,11 +897,7 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1077,11 +914,7 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1098,11 +931,7 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1119,11 +948,7 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1144,11 +969,7 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1165,11 +986,7 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1186,11 +1003,7 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1207,11 +1020,7 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
index 13205d30258188..a85da1a0d799cb 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
@@ -324,20 +324,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x2_s64
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
               @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -348,20 +348,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
               @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -372,20 +372,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
               @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -396,20 +396,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
               @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
@@ -484,20 +484,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_uhl_x2_u64
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
               @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -508,20 +508,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
               @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -532,20 +532,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
               @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -556,20 +556,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
               @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
index 5e94f11390a821..f0a140d3d52c38 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
@@ -8,7 +8,6 @@ define <vscale x 2 x i64> @test_tileslice_no_add(i32 %idx) #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
 ; CHECK-NEXT:    ret
 entry:
   %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx)
@@ -21,7 +20,6 @@ define <vscale x 2 x i64> @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w0, w1
 ; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
 ; CHECK-NEXT:    ret
 entry:
   %add = add i32 %idx1, %idx2

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
index ce9d0ca35cb629..dfc23eaf8a4cea 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
@@ -196,20 +196,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_mul
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                        <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -221,20 +221,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -246,20 +246,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                         <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -271,20 +271,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                         <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
index c2865990faab6f..7387f889d8b26e 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
@@ -8,9 +8,7 @@
 define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -27,9 +25,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
 define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -50,11 +46,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
 define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -75,11 +67,7 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
 define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -105,11 +93,7 @@ define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -128,11 +112,7 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -155,15 +135,7 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -187,15 +159,7 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -225,9 +189,7 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -240,9 +202,7 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -255,9 +215,7 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    fsub za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -272,9 +230,7 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    fsub za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -291,11 +247,7 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -314,11 +266,7 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -337,11 +285,7 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    fsub za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -360,11 +304,7 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_f64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    fsub za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
index 49106e12378bea..c83206f4d57891 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
@@ -19,7 +19,7 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, <vscale x 8 x half>
 
 ; == BFVDOT ==
 
-define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm) {
+define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm) #1 {
 ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -35,7 +35,7 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloa
 
 ; == SVDOT ==
 
-define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) {
+define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) #1 {
 ; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -48,7 +48,7 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %
   ret void
 }
 
-define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
+define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
 ; CHECK-LABEL: test_svdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -61,7 +61,7 @@ define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %
   ret void
 }
 
-define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) {
+define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) #1 {
 ; CHECK-LABEL: test_svdot_lane_za64_vg1x4_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -151,7 +151,7 @@ entry:
 
 ; == UVDOT ==
 
-define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) {
+define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) #1 {
 ; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -164,7 +164,7 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %
   ret void
 }
 
-define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
+define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
 ; CHECK-LABEL: test_uvdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -177,7 +177,7 @@ define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %
   ret void
 }
 
-define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) {
+define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) #1 {
 ; CHECK-LABEL: test_uvdot_lane_za64_vg1x4_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -267,7 +267,7 @@ entry:
 
 ; == SUVDOT ==
 
-define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
+define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
 ; CHECK-LABEL: test_suvdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -332,7 +332,7 @@ entry:
 
 ; == USVDOT ==
 
-define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
+define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
 ; CHECK-LABEL: test_usvdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0

diff  --git a/llvm/test/CodeGen/AArch64/smul_fix_sat.ll b/llvm/test/CodeGen/AArch64/smul_fix_sat.ll
index c2d8d34b9305a3..d72798936b7c80 100644
--- a/llvm/test/CodeGen/AArch64/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/AArch64/smul_fix_sat.ll
@@ -146,8 +146,6 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-LABEL: vec:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w9, v1.s[1]
 ; CHECK-NEXT:    mov w10, v0.s[1]
 ; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
@@ -167,7 +165,6 @@ define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-NEXT:    csel w8, w8, w9, ne
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w11
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp = call <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
   ret <2 x i32> %tmp

diff  --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index b1e7ebe3a7e82b..2bf1393c888b5b 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -113,8 +113,8 @@ body:             |
     liveins: $q0
     %0 = COPY $q0
     INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31
-    ; CHECK:      %1:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
-    ; CHECK-NEXT: $s0 = COPY %1.ssub
+    ; CHECK:      %3:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+    ; CHECK-NEXT: $s0 = COPY %3.ssub
     $s0 = COPY %0.ssub
     RET_ReallyLR implicit $s0
 ...

diff  --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll
index b8ab9a00c69810..3fffb25704fe3f 100644
--- a/llvm/test/CodeGen/AArch64/split-vector-insert.ll
+++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll
@@ -21,7 +21,6 @@ define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %
 ; CHECK-LEGALIZATION-NEXT:    ptrue p0.d, vl2
 ; CHECK-LEGALIZATION-NEXT:    mov w9, #2 // =0x2
 ; CHECK-LEGALIZATION-NEXT:    sub x8, x8, #2
-; CHECK-LEGALIZATION-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-LEGALIZATION-NEXT:    mov x10, sp
 ; CHECK-LEGALIZATION-NEXT:    cmp x8, #2
 ; CHECK-LEGALIZATION-NEXT:    mov z0.d, p0/m, z1.d
@@ -65,7 +64,6 @@ define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov w9, #2 // =0x2
 ; CHECK-NEXT:    sub x8, x8, #2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    cmp x8, #2
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
@@ -118,7 +116,6 @@ define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x
 ; CHECK-LEGALIZATION-NEXT:    ptrue p0.d, vl2
 ; CHECK-LEGALIZATION-NEXT:    mov w9, #2 // =0x2
 ; CHECK-LEGALIZATION-NEXT:    sub x8, x8, #2
-; CHECK-LEGALIZATION-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-LEGALIZATION-NEXT:    mov x10, sp
 ; CHECK-LEGALIZATION-NEXT:    cmp x8, #2
 ; CHECK-LEGALIZATION-NEXT:    mov z0.d, p0/m, z1.d
@@ -162,7 +159,6 @@ define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov w9, #2 // =0x2
 ; CHECK-NEXT:    sub x8, x8, #2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    cmp x8, #2
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d

diff  --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
index b165ac0d56d20f..b009b6b990ebeb 100644
--- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
@@ -302,7 +302,6 @@ define <2 x i64> @fold_srem_v2i64(<2 x i64> %x) {
 define <1 x i64> @fold_srem_v1i64(<1 x i64> %x) {
 ; CHECK-LABEL: fold_srem_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mov x8, #7378697629483820646 // =0x6666666666666666
 ; CHECK-NEXT:    movk x8, #26215

diff  --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll
index 86d74b69f4958f..707b95a0514241 100644
--- a/llvm/test/CodeGen/AArch64/store.ll
+++ b/llvm/test/CodeGen/AArch64/store.ll
@@ -109,7 +109,6 @@ define void @store_v2i64(<2 x i64> %a, ptr %ptr){
 define void @store_v2i8(<2 x i8> %a, ptr %ptr){
 ; CHECK-SD-LABEL: store_v2i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    strb w9, [x0]
@@ -118,7 +117,6 @@ define void @store_v2i8(<2 x i8> %a, ptr %ptr){
 ;
 ; CHECK-GI-LABEL: store_v2i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    str b0, [x0]
 ; CHECK-GI-NEXT:    str b1, [x0, #1]
@@ -149,7 +147,6 @@ define void @store_v32i8(<32 x i8> %a, ptr %ptr){
 define void @store_v2i16(<2 x i16> %a, ptr %ptr){
 ; CHECK-SD-LABEL: store_v2i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    strh w9, [x0]
@@ -158,7 +155,6 @@ define void @store_v2i16(<2 x i16> %a, ptr %ptr){
 ;
 ; CHECK-GI-LABEL: store_v2i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    str h0, [x0]
 ; CHECK-GI-NEXT:    str h1, [x0, #2]
@@ -177,16 +173,10 @@ define void @store_v16i16(<16 x i16> %a, ptr %ptr){
 }
 
 define void @store_v1i32(<1 x i32> %a, ptr %ptr){
-; CHECK-SD-LABEL: store_v1i32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    str s0, [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: store_v1i32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    str s0, [x0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: store_v1i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
     store <1 x i32> %a, ptr %ptr
     ret void
 }
@@ -242,7 +232,6 @@ define void @store_v7i8(<7 x i8> %a, ptr %ptr){
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add x8, x0, #6
 ; CHECK-SD-NEXT:    add x9, x0, #4
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str s0, [x0]
 ; CHECK-SD-NEXT:    st1 { v0.b }[6], [x8]
 ; CHECK-SD-NEXT:    st1 { v0.h }[2], [x9]
@@ -251,7 +240,6 @@ define void @store_v7i8(<7 x i8> %a, ptr %ptr){
 ; CHECK-GI-LABEL: store_v7i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, #1
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x9, x0, #2
 ; CHECK-GI-NEXT:    st1 { v0.b }[0], [x0]
 ; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
@@ -273,7 +261,6 @@ define void @store_v3i16(<3 x i16> %a, ptr %ptr){
 ; CHECK-SD-LABEL: store_v3i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add x8, x0, #4
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str s0, [x0]
 ; CHECK-SD-NEXT:    st1 { v0.h }[2], [x8]
 ; CHECK-SD-NEXT:    ret
@@ -282,7 +269,6 @@ define void @store_v3i16(<3 x i16> %a, ptr %ptr){
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, #2
 ; CHECK-GI-NEXT:    add x9, x0, #4
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str h0, [x0]
 ; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
 ; CHECK-GI-NEXT:    st1 { v0.h }[2], [x9]

diff  --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll
index 8f35a69f52b85b..44be83b1578234 100644
--- a/llvm/test/CodeGen/AArch64/sub.ll
+++ b/llvm/test/CodeGen/AArch64/sub.ll
@@ -399,19 +399,14 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-GI-NEXT:    fmov x8, d2
-; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    sub x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = sub <3 x i64> %d, %e

diff  --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
index d04dc500287653..dd9e462b859054 100644
--- a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
@@ -8,9 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg)
@@ -22,9 +20,7 @@ define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg)
@@ -36,9 +32,7 @@ define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg)
@@ -50,9 +44,7 @@ define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
@@ -64,9 +56,7 @@ define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
@@ -97,9 +87,7 @@ define i64 @cntp_add_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1
 define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -110,9 +98,7 @@ define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -123,9 +109,7 @@ define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -136,9 +120,7 @@ define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_add_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -167,9 +149,7 @@ define i64 @cntp_add_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i
 define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg)
@@ -181,9 +161,7 @@ define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg)
@@ -195,9 +173,7 @@ define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg)
@@ -209,9 +185,7 @@ define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
@@ -223,9 +197,7 @@ define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
@@ -256,9 +228,7 @@ define i64 @cntp_sub_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1
 define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -269,9 +239,7 @@ define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -282,9 +250,7 @@ define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -295,9 +261,7 @@ define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_sub_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
   %2 = trunc i64 %1 to i32

diff  --git a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll
index 7bc31d44bb6547..9100624fb62411 100644
--- a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll
+++ b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll
@@ -8,7 +8,6 @@ define float @add_f32(<vscale x 8 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fadd z0.s, z0.s, z2.s
 ; CHECK-NEXT:    faddv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fadd.f32.nxv8f32(float -0.0, <vscale x 8 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float -0.0, <vscale x 4 x float> %b)
@@ -30,7 +29,6 @@ define float @fmin_f32(<vscale x 8 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %b)
@@ -45,7 +43,6 @@ define float @fmax_f32(<vscale x 8 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %b)
@@ -92,7 +89,6 @@ define i32 @add_i32(<vscale x 8 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-NEXT:    add z0.s, z0.s, z2.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %r1 = call i32 @llvm.vector.reduce.add.i32.nxv8i32(<vscale x 8 x i32> %a)
   %r2 = call i32 @llvm.vector.reduce.add.i32.nxv4i32(<vscale x 4 x i32> %b)
@@ -113,7 +109,6 @@ define i16 @add_ext_i16(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ae = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
   %be = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
@@ -140,7 +135,6 @@ define i16 @add_ext_v32i16(<vscale x 32 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ae = zext <vscale x 32 x i8> %a to <vscale x 32 x i16>
   %be = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index 6d4f5963881e58..399490e270f901 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -118,7 +118,6 @@ define i64 @test_lane2_2xi64(<vscale x 2 x i64> %a) #0 {
 define half @test_lane0_8xf16(<vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: test_lane0_8xf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x half> %a, i32 0
   ret half %b
@@ -128,7 +127,6 @@ define half @test_lane7_8xf16(<vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: test_lane7_8xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x half> %a, i32 7
   ret half %b
@@ -138,7 +136,6 @@ define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: test_lane8_8xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[8]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x half> %a, i32 8
   ret half %b
@@ -147,7 +144,6 @@ define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
 define half @test_lane0_4xf16(<vscale x 4 x half> %a) #0 {
 ; CHECK-LABEL: test_lane0_4xf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x half> %a, i32 0
   ret half %b
@@ -157,7 +153,6 @@ define half @test_lane3_4xf16(<vscale x 4 x half> %a) #0 {
 ; CHECK-LABEL: test_lane3_4xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x half> %a, i32 3
   ret half %b
@@ -167,7 +162,6 @@ define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
 ; CHECK-LABEL: test_lane4_4xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[4]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x half> %a, i32 4
   ret half %b
@@ -176,7 +170,6 @@ define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
 define half @test_lane0_2xf16(<vscale x 2 x half> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x half> %a, i32 0
   ret half %b
@@ -186,7 +179,6 @@ define half @test_lane1_2xf16(<vscale x 2 x half> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x half> %a, i32 1
   ret half %b
@@ -196,7 +188,6 @@ define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x half> %a, i32 2
   ret half %b
@@ -205,7 +196,6 @@ define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
 define bfloat @test_lane0_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane0_8xbf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x bfloat> %a, i32 0
   ret bfloat %b
@@ -215,7 +205,6 @@ define bfloat @test_lane7_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane7_8xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x bfloat> %a, i32 7
   ret bfloat %b
@@ -225,7 +214,6 @@ define bfloat @test_lane8_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane8_8xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[8]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x bfloat> %a, i32 8
   ret bfloat %b
@@ -234,7 +222,6 @@ define bfloat @test_lane8_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 define bfloat @test_lane0_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane0_4xbf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x bfloat> %a, i32 0
   ret bfloat %b
@@ -244,7 +231,6 @@ define bfloat @test_lane3_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane3_4xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x bfloat> %a, i32 3
   ret bfloat %b
@@ -254,7 +240,6 @@ define bfloat @test_lane4_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane4_4xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[4]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x bfloat> %a, i32 4
   ret bfloat %b
@@ -263,7 +248,6 @@ define bfloat @test_lane4_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 define bfloat @test_lane0_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xbf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x bfloat> %a, i32 0
   ret bfloat %b
@@ -273,7 +257,6 @@ define bfloat @test_lane1_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x bfloat> %a, i32 1
   ret bfloat %b
@@ -283,7 +266,6 @@ define bfloat @test_lane2_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x bfloat> %a, i32 2
   ret bfloat %b
@@ -292,7 +274,6 @@ define bfloat @test_lane2_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 define float @test_lane0_4xf32(<vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: test_lane0_4xf32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x float> %a, i32 0
   ret float %b
@@ -302,7 +283,6 @@ define float @test_lane3_4xf32(<vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: test_lane3_4xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x float> %a, i32 3
   ret float %b
@@ -312,7 +292,6 @@ define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: test_lane4_4xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[4]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x float> %a, i32 4
   ret float %b
@@ -321,7 +300,6 @@ define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
 define float @test_lane0_2xf32(<vscale x 2 x float> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xf32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x float> %a, i32 0
   ret float %b
@@ -331,7 +309,6 @@ define float @test_lane1_2xf32(<vscale x 2 x float> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x float> %a, i32 1
   ret float %b
@@ -341,7 +318,6 @@ define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x float> %a, i32 2
   ret float %b
@@ -350,7 +326,6 @@ define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
 define double @test_lane0_2xf64(<vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xf64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x double> %a, i32 0
   ret double %b
@@ -360,7 +335,6 @@ define double @test_lane1_2xf64(<vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xf64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x double> %a, i32 1
   ret double %b
@@ -370,7 +344,6 @@ define double @test_lane2_2xf64(<vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xf64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x double> %a, i32 2
   ret double %b

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
index 5b7522856e2daf..6bce1050bbc811 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
@@ -85,7 +85,6 @@ define <2 x i16> @extract_v2i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
 ; CHECK-NEXT:    addvl x8, sp, #4
 ; CHECK-NEXT:    add x8, x8, #34
 ; CHECK-NEXT:    ld1 { v0.h }[2], [x8]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    addvl sp, sp, #8
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -144,7 +143,6 @@ define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
 ; CHECK-LABEL: extract_v2f32_nxv16f32_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x float> @llvm.vector.extract.v2f32.nxv16f32(<vscale x 16 x float> %arg, i64 2)
   ret <2 x float> %ext
@@ -161,7 +159,6 @@ define <4 x i1> @extract_v4i1_nxv32i1_0(<vscale x 32 x i1> %arg) {
 ; CHECK-NEXT:    umov w8, v1.b[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ext = call <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1> %arg, i64 0)
   ret <4 x i1> %ext
@@ -199,7 +196,6 @@ define <4 x i1> @extract_v4i1_nxv32i1_16(<vscale x 32 x i1> %arg) {
 ; CHECK-NEXT:    addvl x8, sp, #6
 ; CHECK-NEXT:    add x8, x8, #19
 ; CHECK-NEXT:    ld1 { v0.b }[6], [x8]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    addvl sp, sp, #8
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -218,7 +214,6 @@ define <4 x i1> @extract_v4i1_v32i1_16(<32 x i1> %arg) {
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    ldr w8, [sp, #88]
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ext = call <4 x i1> @llvm.vector.extract.v4i1.v32i1(<32 x i1> %arg, i64 16)
   ret <4 x i1> %ext
@@ -254,7 +249,6 @@ define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
 ; CHECK-NEXT:    addvl x8, sp, #6
 ; CHECK-NEXT:    add x8, x8, #19
 ; CHECK-NEXT:    ld1 { v0.b }[6], [x8]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    addvl sp, sp, #8
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -268,7 +262,6 @@ define <2 x i32> @extract_v2i32_nxv16i32_2(<vscale x 16 x i32> %arg) {
 ; CHECK-LABEL: extract_v2i32_nxv16i32_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %arg, i64 2)
   ret <2 x i32> %ext
@@ -284,7 +277,6 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ldr q1, [sp, #16]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -298,7 +290,6 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 0)
   ret <4 x half> %ext
@@ -310,7 +301,6 @@ define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 4)
   ret <4 x half> %ext
@@ -322,7 +312,6 @@ define <2 x half> @extract_v2f16_nxv4f16_2(<vscale x 4 x half> %arg) {
 ; CHECK-NEXT:    mov z1.s, z0.s[3]
 ; CHECK-NEXT:    mov z0.s, z0.s[2]
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 2)
   ret <2 x half> %ext
@@ -334,7 +323,6 @@ define <2 x half> @extract_v2f16_nxv4f16_6(<vscale x 4 x half> %arg) {
 ; CHECK-NEXT:    mov z1.s, z0.s[7]
 ; CHECK-NEXT:    mov z0.s, z0.s[6]
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 6)
   ret <2 x half> %ext

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 518e3573b5edd3..06ca0b4a4b7601 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -5,7 +5,6 @@
 define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
 ; CHECK-LABEL: extract_v2i64_nxv2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 0)
   ret <2 x i64> %retval
@@ -16,7 +15,6 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
 ; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
   ret <2 x i64> %retval
@@ -26,7 +24,6 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
 define <4 x i32> @extract_v4i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
 ; CHECK-LABEL: extract_v4i32_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
   ret <4 x i32> %retval
@@ -37,7 +34,6 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec) nounwind {
 ; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 4)
   ret <4 x i32> %retval
@@ -48,7 +44,6 @@ define <4 x i32> @extract_v4i32_nxv2i32(<vscale x 2 x i32> %vec) nounwind #1 {
 ; CHECK-LABEL: extract_v4i32_nxv2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %vec, i64 0)
   ret <4 x i32> %retval
@@ -60,7 +55,6 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #32
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %vec, i64 4)
   ret <4 x i32> %retval
@@ -70,7 +64,6 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
 define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
 ; CHECK-LABEL: extract_v8i16_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 0)
   ret <8 x i16> %retval
@@ -81,7 +74,6 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec) nounwind {
 ; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 8)
   ret <8 x i16> %retval
@@ -92,7 +84,6 @@ define <8 x i16> @extract_v8i16_nxv4i16(<vscale x 4 x i16> %vec) nounwind #1 {
 ; CHECK-LABEL: extract_v8i16_nxv4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv4i16(<vscale x 4 x i16> %vec, i64 0)
   ret <8 x i16> %retval
@@ -104,7 +95,6 @@ define <8 x i16> @extract_v8i16_nxv4i16_idx8(<vscale x 4 x i16> %vec) nounwind #
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #32
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv4i16(<vscale x 4 x i16> %vec, i64 8)
   ret <8 x i16> %retval
@@ -116,7 +106,6 @@ define <8 x i16> @extract_v8i16_nxv2i16(<vscale x 2 x i16> %vec) nounwind #1 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv2i16(<vscale x 2 x i16> %vec, i64 0)
   ret <8 x i16> %retval
@@ -129,7 +118,6 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #64
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv2i16(<vscale x 2 x i16> %vec, i64 8)
   ret <8 x i16> %retval
@@ -139,7 +127,6 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
 define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
 ; CHECK-LABEL: extract_v16i8_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -150,7 +137,6 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind
 ; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -161,7 +147,6 @@ define <16 x i8> @extract_v16i8_nxv8i8(<vscale x 8 x i8> %vec) nounwind #1 {
 ; CHECK-LABEL: extract_v16i8_nxv8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -173,7 +158,6 @@ define <16 x i8> @extract_v16i8_nxv8i8_idx16(<vscale x 8 x i8> %vec) nounwind #1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #32
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -185,7 +169,6 @@ define <16 x i8> @extract_v16i8_nxv4i8(<vscale x 4 x i8> %vec) nounwind #1 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv4i8(<vscale x 4 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -198,7 +181,6 @@ define <16 x i8> @extract_v16i8_nxv4i8_idx16(<vscale x 4 x i8> %vec) nounwind #1
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #64
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv4i8(<vscale x 4 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -211,7 +193,6 @@ define <16 x i8> @extract_v16i8_nxv2i8(<vscale x 2 x i8> %vec) nounwind #1 {
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -225,7 +206,6 @@ define <16 x i8> @extract_v16i8_nxv2i8_idx16(<vscale x 2 x i8> %vec) nounwind #1
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -242,7 +222,6 @@ define <2 x i1> @extract_v2i1_nxv2i1(<vscale x 2 x i1> %inmask) {
 ; CHECK-NEXT:    mov x8, v0.d[1]
 ; CHECK-NEXT:    fmov s0, w0
 ; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mask = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %inmask, i64 0)
   ret <2 x i1> %mask
@@ -259,7 +238,6 @@ define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mask = call <4 x i1> @llvm.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1> %inmask, i64 0)
   ret <4 x i1> %mask
@@ -284,7 +262,6 @@ define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) {
 ; CHECK-NEXT:    umov w8, v1.h[7]
 ; CHECK-NEXT:    mov v0.b[6], w9
 ; CHECK-NEXT:    mov v0.b[7], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mask = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> %inmask, i64 0)
   ret <8 x i1> %mask
@@ -324,7 +301,6 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
 ; CHECK-LABEL: extract_fixed_v2i64_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
   ret <2 x i64> %retval
@@ -348,7 +324,6 @@ define <4 x i32> @typesize_regression_test_v4i32(ptr %addr, i64 %idx) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %ptr = getelementptr inbounds i32, ptr %addr, i64 %idx
@@ -365,7 +340,6 @@ entry:
 define <2 x float> @extract_v2f32_nxv4f32_splat(float %f) {
 ; CHECK-LABEL: extract_v2f32_nxv4f32_splat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 4 x float> poison, float %f, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index cbede1bf8bb74b..99e833fc17271a 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -1025,7 +1025,6 @@ declare <vscale x 4 x bfloat> @llvm.vector.extract.nxv4bf16.nxv16bf16(<vscale x
 define <vscale x 2 x float> @extract_nxv2f32_nxv4f32_splat(float %f) {
 ; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 4 x float> poison, float %f, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll
index e7681cae083720..e2227f5855938c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll
@@ -6,9 +6,7 @@
 define float @pred_fadda_nxv2f32(float %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x float> %y, <vscale x 2 x float> splat(float -0.000000e+00)
   %fadda = call float @llvm.vector.reduce.fadd.nxv2f32(float %x, <vscale x 2 x float> %sel)
@@ -18,9 +16,7 @@ define float @pred_fadda_nxv2f32(float %x, <vscale x 2 x float> %y, <vscale x 2
 define float @pred_fadda_nxv4f32(float %x, <vscale x 4 x float> %y, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %y, <vscale x 4 x float> splat(float -0.000000e+00)
   %fadda = call float @llvm.vector.reduce.fadd.nxv4f32(float %x, <vscale x 4 x float> %sel)
@@ -30,9 +26,7 @@ define float @pred_fadda_nxv4f32(float %x, <vscale x 4 x float> %y, <vscale x 4
 define double @pred_fadda_nxv2f64(double %x, <vscale x 2 x double> %y, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %y, <vscale x 2 x double> splat(double -0.000000e+00)
   %fadda = call double @llvm.vector.reduce.fadd.nxv2f64(double %x, <vscale x 2 x double> %sel)
@@ -42,9 +36,7 @@ define double @pred_fadda_nxv2f64(double %x, <vscale x 2 x double> %y, <vscale x
 define half @pred_fadda_nxv2f16(half %x, <vscale x 2 x half> %y, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x half> %y, <vscale x 2 x half> splat(half -0.000000e+00)
   %fadda = call half @llvm.vector.reduce.fadd.nxv2f16(half %x, <vscale x 2 x half> %sel)
@@ -54,9 +46,7 @@ define half @pred_fadda_nxv2f16(half %x, <vscale x 2 x half> %y, <vscale x 2 x i
 define half @pred_fadda_nxv4f16(half %x, <vscale x 4 x half> %y, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x half> %y, <vscale x 4 x half> splat(half -0.000000e+00)
   %fadda = call half @llvm.vector.reduce.fadd.nxv4f16(half %x, <vscale x 4 x half> %sel)
@@ -66,9 +56,7 @@ define half @pred_fadda_nxv4f16(half %x, <vscale x 4 x half> %y, <vscale x 4 x i
 define half @pred_fadda_nxv8f16(half %x, <vscale x 8 x half> %y, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %y, <vscale x 8 x half> splat(half -0.000000e+00)
   %fadda = call half @llvm.vector.reduce.fadd.nxv8f16(half %x, <vscale x 8 x half> %sel)

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
index 1e71c4b66156cb..cf42145ea542ec 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
@@ -277,9 +277,7 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ctlz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
   ret <1 x i64> %res
@@ -289,9 +287,7 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ctlz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
   ret <2 x i64> %res
@@ -741,7 +737,6 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz v0.8b, v0.8b
 ; CHECK-NEXT:    ret
@@ -753,7 +748,6 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz v0.16b, v0.16b
 ; CHECK-NEXT:    ret
@@ -839,7 +833,6 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz v0.4h, v0.4h
 ; CHECK-NEXT:    ret
@@ -851,7 +844,6 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz v0.8h, v0.8h
 ; CHECK-NEXT:    ret
@@ -938,7 +930,6 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz v0.2s, v0.2s
 ; CHECK-NEXT:    ret
@@ -951,7 +942,6 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz v0.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -1037,10 +1027,8 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
   ret <1 x i64> %res
@@ -1050,10 +1038,8 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
index f7751131005e30..92b332f7611fb8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
@@ -23,8 +23,6 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) vscale_range(2,0) #0
 define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -184,8 +182,6 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) vscale_range(2,0)
 define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -314,8 +310,6 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) vscale_range(2,0)
 define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -419,8 +413,6 @@ define void @concat_v64i32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
 define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> <i32 0, i32 1>
@@ -527,8 +519,6 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) vscale_range(2
 define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -657,8 +647,6 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) vscale_rang
 define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -762,8 +750,6 @@ define void @concat_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
 define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> <i32 0, i32 1>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
index 0f8f4a6843eae8..dddfccaf8acce3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
@@ -22,7 +22,6 @@ define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %op, i64 8)
   ret <8 x i8> %ret
@@ -103,7 +102,6 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2)
   ret <2 x i16> %ret
@@ -114,7 +112,6 @@ define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %op, i64 4)
   ret <4 x i16> %ret
@@ -193,7 +190,6 @@ define void @extract_subvector_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1)
@@ -205,7 +201,6 @@ define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %op, i64 2)
   ret <2 x i32> %ret
@@ -285,7 +280,6 @@ define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <1 x i64> @llvm.vector.extract.v1i64.v2i64(<2 x i64> %op, i64 1)
   ret <1 x i64> %ret
@@ -357,7 +351,6 @@ define void @extract_subvector_v32i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
 define <2 x half> @extract_subvector_v4f16(<4 x half> %op) vscale_range(16,0) #0 {
 ; CHECK-LABEL: extract_subvector_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %ret = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %op, i64 2)
@@ -369,7 +362,6 @@ define <4 x half> @extract_subvector_v8f16(<8 x half> %op) vscale_range(2,0) #0
 ; CHECK-LABEL: extract_subvector_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <4 x half> @llvm.vector.extract.v4f16.v8f16(<8 x half> %op, i64 4)
   ret <4 x half> %ret
@@ -448,7 +440,6 @@ define void @extract_subvector_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x float> @extract_subvector_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1)
@@ -460,7 +451,6 @@ define <2 x float> @extract_subvector_v4f32(<4 x float> %op) vscale_range(2,0) #
 ; CHECK-LABEL: extract_subvector_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> %op, i64 2)
   ret <2 x float> %ret
@@ -540,7 +530,6 @@ define <1 x double> @extract_subvector_v2f64(<2 x double> %op) vscale_range(2,0)
 ; CHECK-LABEL: extract_subvector_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <1 x double> @llvm.vector.extract.v1f64.v2f64(<2 x double> %op, i64 1)
   ret <1 x double> %ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
index ad4efeaf39247a..079ff17d5f0e4c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
@@ -13,7 +13,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @extractelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extractelement_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[3]
 ; CHECK-NEXT:    ret
     %r = extractelement <4 x half> %op1, i64 3
@@ -36,7 +35,6 @@ define half @extractelement_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    mov z0.h, z0.h[15]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
     %op1 = load <16 x half>, ptr %a
     %r = extractelement <16 x half> %op1, i64 15
@@ -50,7 +48,6 @@ define half @extractelement_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    mov z0.h, z0.h[15]
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: extractelement_v32f16:
@@ -58,7 +55,6 @@ define half @extractelement_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    mov z0.h, z0.h[31]
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
     %op1 = load <32 x half>, ptr %a
     %r = extractelement <32 x half> %op1, i64 31
@@ -97,7 +93,6 @@ define half @extractelement_v128f16(ptr %a) vscale_range(16,0) #0 {
 define float @extractelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extractelement_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    ret
     %r = extractelement <2 x float> %op1, i64 1
@@ -120,7 +115,6 @@ define float @extractelement_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    mov z0.s, z0.s[7]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
     %op1 = load <8 x float>, ptr %a
     %r = extractelement <8 x float> %op1, i64 7
@@ -134,7 +128,6 @@ define float @extractelement_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
 ; VBITS_GE_256-NEXT:    mov z0.s, z0.s[7]
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: extractelement_v16f32:
@@ -142,7 +135,6 @@ define float @extractelement_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    mov z0.s, z0.s[15]
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
     %op1 = load <16 x float>, ptr %a
     %r = extractelement <16 x float> %op1, i64 15
@@ -202,7 +194,6 @@ define double @extractelement_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    mov z0.d, z0.d[3]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
     %op1 = load <4 x double>, ptr %a
     %r = extractelement <4 x double> %op1, i64 3
@@ -216,7 +207,6 @@ define double @extractelement_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
 ; VBITS_GE_256-NEXT:    mov z0.d, z0.d[3]
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: extractelement_v8f64:
@@ -224,7 +214,6 @@ define double @extractelement_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    mov z0.d, z0.d[7]
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
     %op1 = load <8 x double>, ptr %a
     %r = extractelement <8 x double> %op1, i64 7

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
index b60988be1e76c7..ccf47cf75d2c8c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
@@ -524,7 +524,6 @@ define void @fcvt_v32f64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvt_v1f64_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
index 8053a401e5f45e..0b11789b7d549a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
@@ -14,10 +14,7 @@ define half @fadda_v4f16(half %start, <4 x half> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
   ret half %res
@@ -28,10 +25,7 @@ define half @fadda_v8f16(half %start, <8 x half> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
   ret half %res
@@ -41,10 +35,8 @@ define half @fadda_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fadda_v16f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl16
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
@@ -55,22 +47,18 @@ define half @fadda_v32f16(half %start, ptr %a) #0 {
 ; VBITS_GE_256-LABEL: fadda_v32f16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fadda h0, p0, h0, z1.h
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    fadda h0, p0, h0, z1.h
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fadda_v32f16:
 ; VBITS_GE_512:       // %bb.0:
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fadda h0, p0, h0, z1.h
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
@@ -81,10 +69,8 @@ define half @fadda_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
 ; CHECK-LABEL: fadda_v64f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl64
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
@@ -95,10 +81,8 @@ define half @fadda_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
 ; CHECK-LABEL: fadda_v128f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl128
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
@@ -110,10 +94,7 @@ define float @fadda_v2f32(float %start, <2 x float> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
   ret float %res
@@ -124,10 +105,7 @@ define float @fadda_v4f32(float %start, <4 x float> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
   ret float %res
@@ -137,10 +115,8 @@ define float @fadda_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fadda_v8f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl8
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
@@ -151,22 +127,18 @@ define float @fadda_v16f32(float %start, ptr %a) #0 {
 ; VBITS_GE_256-LABEL: fadda_v16f32:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fadda s0, p0, s0, z1.s
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
 ; VBITS_GE_256-NEXT:    fadda s0, p0, s0, z1.s
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fadda_v16f32:
 ; VBITS_GE_512:       // %bb.0:
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fadda s0, p0, s0, z1.s
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
@@ -177,10 +149,8 @@ define float @fadda_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
 ; CHECK-LABEL: fadda_v32f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl32
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
@@ -191,10 +161,8 @@ define float @fadda_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
 ; CHECK-LABEL: fadda_v64f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl64
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
@@ -216,10 +184,7 @@ define double @fadda_v2f64(double %start, <2 x double> %a) vscale_range(1,0) #0
 ; CHECK-LABEL: fadda_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
   ret double %res
@@ -229,10 +194,8 @@ define double @fadda_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fadda_v4f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
@@ -243,22 +206,18 @@ define double @fadda_v8f64(double %start, ptr %a) #0 {
 ; VBITS_GE_256-LABEL: fadda_v8f64:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fadda d0, p0, d0, z1.d
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
 ; VBITS_GE_256-NEXT:    fadda d0, p0, d0, z1.d
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fadda_v8f64:
 ; VBITS_GE_512:       // %bb.0:
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fadda d0, p0, d0, z1.d
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
@@ -269,10 +228,8 @@ define double @fadda_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
 ; CHECK-LABEL: fadda_v16f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
@@ -283,10 +240,8 @@ define double @fadda_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
 ; CHECK-LABEL: fadda_v32f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
@@ -302,7 +257,6 @@ define half @faddv_v4f16(half %start, <4 x half> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: faddv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -315,7 +269,6 @@ define half @faddv_v8f16(half %start, <8 x half> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: faddv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -402,7 +355,6 @@ define float @faddv_v4f32(float %start, <4 x float> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: faddv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv s1, p0, z1.s
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
@@ -587,7 +539,6 @@ define half @fmaxv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
@@ -603,7 +554,6 @@ define half @fmaxv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fmaxnmv h0, p0, z0.h
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaxv_v32f16:
@@ -611,7 +561,6 @@ define half @fmaxv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxnmv h0, p0, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op)
@@ -624,7 +573,6 @@ define half @fmaxv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op)
@@ -637,7 +585,6 @@ define half @fmaxv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op)
@@ -670,7 +617,6 @@ define float @fmaxv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
@@ -686,7 +632,6 @@ define float @fmaxv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fmaxnmv s0, p0, z0.s
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaxv_v16f32:
@@ -694,7 +639,6 @@ define float @fmaxv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxnmv s0, p0, z0.s
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op)
@@ -707,7 +651,6 @@ define float @fmaxv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op)
@@ -720,7 +663,6 @@ define float @fmaxv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op)
@@ -752,7 +694,6 @@ define double @fmaxv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
@@ -768,7 +709,6 @@ define double @fmaxv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fmaxnmv d0, p0, z0.d
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaxv_v8f64:
@@ -776,7 +716,6 @@ define double @fmaxv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxnmv d0, p0, z0.d
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op)
@@ -789,7 +728,6 @@ define double @fmaxv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op)
@@ -802,7 +740,6 @@ define double @fmaxv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op)
@@ -839,7 +776,6 @@ define half @fminv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
@@ -855,7 +791,6 @@ define half @fminv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fminnmv h0, p0, z0.h
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminv_v32f16:
@@ -863,7 +798,6 @@ define half @fminv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminnmv h0, p0, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op)
@@ -876,7 +810,6 @@ define half @fminv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op)
@@ -889,7 +822,6 @@ define half @fminv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op)
@@ -922,7 +854,6 @@ define float @fminv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
@@ -938,7 +869,6 @@ define float @fminv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fminnmv s0, p0, z0.s
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminv_v16f32:
@@ -946,7 +876,6 @@ define float @fminv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminnmv s0, p0, z0.s
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op)
@@ -959,7 +888,6 @@ define float @fminv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op)
@@ -972,7 +900,6 @@ define float @fminv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op)
@@ -1004,7 +931,6 @@ define double @fminv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
@@ -1020,7 +946,6 @@ define double @fminv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fminnmv d0, p0, z0.d
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminv_v8f64:
@@ -1028,7 +953,6 @@ define double @fminv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminnmv d0, p0, z0.d
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op)
@@ -1041,7 +965,6 @@ define double @fminv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op)
@@ -1054,7 +977,6 @@ define double @fminv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op)
@@ -1089,7 +1011,6 @@ define half @fmaximumv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
@@ -1105,7 +1026,6 @@ define half @fmaximumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fmaxv h0, p0, z0.h
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaximumv_v32f16:
@@ -1113,7 +1033,6 @@ define half @fmaximumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxv h0, p0, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v32f16(<32 x half> %op)
@@ -1126,7 +1045,6 @@ define half @fmaximumv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v64f16(<64 x half> %op)
@@ -1139,7 +1057,6 @@ define half @fmaximumv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v128f16(<128 x half> %op)
@@ -1172,7 +1089,6 @@ define float @fmaximumv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
@@ -1188,7 +1104,6 @@ define float @fmaximumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fmaxv s0, p0, z0.s
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaximumv_v16f32:
@@ -1196,7 +1111,6 @@ define float @fmaximumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxv s0, p0, z0.s
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %op)
@@ -1209,7 +1123,6 @@ define float @fmaximumv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %op)
@@ -1222,7 +1135,6 @@ define float @fmaximumv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %op)
@@ -1254,7 +1166,6 @@ define double @fmaximumv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
@@ -1270,7 +1181,6 @@ define double @fmaximumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fmaxv d0, p0, z0.d
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaximumv_v8f64:
@@ -1278,7 +1188,6 @@ define double @fmaximumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxv d0, p0, z0.d
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %op)
@@ -1291,7 +1200,6 @@ define double @fmaximumv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %op)
@@ -1304,7 +1212,6 @@ define double @fmaximumv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %op)
@@ -1339,7 +1246,6 @@ define half @fminimumv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
@@ -1355,7 +1261,6 @@ define half @fminimumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fminv h0, p0, z0.h
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminimumv_v32f16:
@@ -1363,7 +1268,6 @@ define half @fminimumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminv h0, p0, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v32f16(<32 x half> %op)
@@ -1376,7 +1280,6 @@ define half @fminimumv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v64f16(<64 x half> %op)
@@ -1389,7 +1292,6 @@ define half @fminimumv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v128f16(<128 x half> %op)
@@ -1422,7 +1324,6 @@ define float @fminimumv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
@@ -1438,7 +1339,6 @@ define float @fminimumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fminv s0, p0, z0.s
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminimumv_v16f32:
@@ -1446,7 +1346,6 @@ define float @fminimumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminv s0, p0, z0.s
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %op)
@@ -1459,7 +1358,6 @@ define float @fminimumv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %op)
@@ -1472,7 +1370,6 @@ define float @fminimumv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %op)
@@ -1504,7 +1401,6 @@ define double @fminimumv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
@@ -1520,7 +1416,6 @@ define double @fminimumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fminv d0, p0, z0.d
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminimumv_v8f64:
@@ -1528,7 +1423,6 @@ define double @fminimumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminv d0, p0, z0.d
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %op)
@@ -1541,7 +1435,6 @@ define double @fminimumv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %op)
@@ -1554,7 +1447,6 @@ define double @fminimumv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %op)

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index d1e9dc13f50e87..383c697d74ea00 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -282,7 +282,6 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
 define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-LABEL: select_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -303,7 +302,6 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v8f64:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_256-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.d
 ; VBITS_GE_256-NEXT:    mov z0.d, x8
@@ -322,7 +320,6 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
 ;
 ; VBITS_GE_512-LABEL: select_v8f64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_512-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_512-NEXT:    ptrue p0.d
 ; VBITS_GE_512-NEXT:    mov z0.d, x8
@@ -343,7 +340,6 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
 define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-LABEL: select_v16f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -364,7 +360,6 @@ define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 define void @select_v32f64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-LABEL: select_v32f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
index af54b146c5b663..baab397f093573 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
@@ -111,7 +111,6 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptoui <2 x half> %op1 to <2 x i32>
   ret <2 x i32> %res
@@ -219,12 +218,10 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
 define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptoui <2 x half> %op1 to <2 x i64>
   ret <2 x i64> %res
@@ -332,7 +329,6 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptoui <4 x float> %op1 to <4 x i16>
   ret <4 x i16> %res
@@ -345,7 +341,6 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x float>, ptr %a
   %res = fptoui <8 x float> %op1 to <8 x i16>
@@ -512,7 +507,6 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptoui <1 x float> %op1 to <1 x i64>
   ret <1 x i64> %res
@@ -611,11 +605,9 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptoui <1 x double> %op1 to <1 x i16>
   ret <1 x i16> %res
@@ -640,7 +632,6 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptoui <4 x double> %op1 to <4 x i16>
@@ -661,7 +652,6 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16:
@@ -671,7 +661,6 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x double>, ptr %a
   %res = fptoui <8 x double> %op1 to <8 x i16>
@@ -714,7 +703,6 @@ define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -740,7 +728,6 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptoui <4 x double> %op1 to <4 x i32>
@@ -1004,7 +991,6 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptosi <2 x half> %op1 to <2 x i32>
   ret <2 x i32> %res
@@ -1112,12 +1098,10 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
 define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptosi <2 x half> %op1 to <2 x i64>
   ret <2 x i64> %res
@@ -1225,7 +1209,6 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptosi <4 x float> %op1 to <4 x i16>
   ret <4 x i16> %res
@@ -1238,7 +1221,6 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x float>, ptr %a
   %res = fptosi <8 x float> %op1 to <8 x i16>
@@ -1405,7 +1387,6 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptosi <1 x float> %op1 to <1 x i64>
   ret <1 x i64> %res
@@ -1504,11 +1485,9 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptosi <1 x double> %op1 to <1 x i16>
   ret <1 x i16> %res
@@ -1533,7 +1512,6 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptosi <4 x double> %op1 to <4 x i16>
@@ -1554,7 +1532,6 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16:
@@ -1564,7 +1541,6 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x double>, ptr %a
   %res = fptosi <8 x double> %op1 to <8 x i16>
@@ -1607,7 +1583,6 @@ define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -1633,7 +1608,6 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptosi <4 x double> %op1 to <4 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 61e04682fa0bfb..b34aa0556c23ae 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -18,7 +18,6 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
@@ -29,7 +28,6 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -70,23 +68,19 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
 ; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __trunctfdf2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
@@ -95,23 +89,19 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
index 6f4d257039bca1..d1ddad591e7e43 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
@@ -14,9 +14,7 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: insertelement_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov h1, #5.00000000
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = insertelement <4 x half> %op1, half 5.0, i64 3
   ret <4 x half> %r
@@ -135,9 +133,7 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: insertelement_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, #5.00000000
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = insertelement <2 x float> %op1, float 5.0, i64 1
   ret <2 x float> %r

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
index 58fca3a2cf8b6e..7758955bca48d9 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
@@ -711,10 +711,7 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
 ; CHECK-LABEL: mul_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = mul <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -724,10 +721,7 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
 ; CHECK-LABEL: mul_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = mul <2 x i64> %op1, %op2
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
index 0ddf434eff930a..a5d2a09048e354 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
@@ -30,11 +30,9 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: sdiv_v8i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
@@ -55,16 +53,13 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_256-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_256-NEXT:    mov v0.b[7], w8
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v8i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
@@ -85,7 +80,6 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_512-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_512-NEXT:    mov v0.b[7], w8
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -118,11 +112,9 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: sdiv_v16i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ext z1.b, z1.b, z1.b, #16
@@ -136,22 +128,18 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z1.b, z1.b
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v16i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -304,7 +292,6 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_256-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_256-NEXT:    mov v0.h[3], w8
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v4i16:
@@ -320,7 +307,6 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_512-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_512-NEXT:    mov v0.h[3], w8
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <4 x i16> %op1, %op2
   ret <4 x i16> %res
@@ -341,26 +327,20 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: sdiv_v8i16:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v8i16:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -500,10 +480,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -514,10 +491,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -622,10 +596,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -636,10 +607,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i64> %op1, %op2
   ret <2 x i64> %res
@@ -763,11 +731,9 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: udiv_v8i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
@@ -788,16 +754,13 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_256-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_256-NEXT:    mov v0.b[7], w8
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v8i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
@@ -818,7 +781,6 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_512-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_512-NEXT:    mov v0.b[7], w8
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -851,11 +813,9 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: udiv_v16i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ext z1.b, z1.b, z1.b, #16
@@ -869,22 +829,18 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z1.b, z1.b
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v16i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -1024,7 +980,6 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_256-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_256-NEXT:    mov v0.h[3], w8
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v4i16:
@@ -1040,7 +995,6 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_512-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_512-NEXT:    mov v0.h[3], w8
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <4 x i16> %op1, %op2
   ret <4 x i16> %res
@@ -1061,26 +1015,20 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: udiv_v8i16:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v8i16:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -1211,10 +1159,7 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -1225,10 +1170,7 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -1333,10 +1275,7 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -1347,10 +1286,7 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <2 x i64> %op1, %op2
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
index 4feb86305f8f62..4ca25ba9bd0206 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
@@ -15,9 +15,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v8i1_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
@@ -38,9 +37,8 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i3_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    lsl z0.d, z0.d, #61
 ; CHECK-NEXT:    asr z0.d, z0.d, #61
@@ -58,9 +56,8 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = sext <16 x i8> %a to <16 x i16>
@@ -141,9 +138,8 @@ define void @sext_v128i8_v128i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -155,11 +151,10 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: sext_v16i8_v16i32:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
-; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
@@ -169,9 +164,8 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: sext_v16i8_v16i32:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -226,9 +220,8 @@ define void @sext_v64i8_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    sxtb z0.d, p0/m, z0.d
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -255,9 +248,8 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: sext_v8i8_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
@@ -270,9 +262,8 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) vscale_range(8,0) #0 {
 ; CHECK-LABEL: sext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -308,9 +299,8 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = sext <8 x i16> %a to <8 x i32>
@@ -390,9 +380,8 @@ define void @sext_v64i16_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -404,11 +393,10 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: sext_v8i16_v8i64:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
-; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
+; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
+; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z0.d, z0.s
 ; VBITS_GE_256-NEXT:    sunpklo z1.d, z1.s
@@ -418,9 +406,8 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: sext_v8i16_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -472,9 +459,8 @@ define void @sext_v32i16_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = sext <4 x i32> %a to <4 x i64>
@@ -554,9 +540,8 @@ define void @sext_v32i32_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = zext <16 x i8> %a to <16 x i16>
@@ -637,9 +622,8 @@ define void @zext_v128i8_v128i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -651,11 +635,10 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: zext_v16i8_v16i32:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
-; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
@@ -665,9 +648,8 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: zext_v16i8_v16i32:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -722,9 +704,8 @@ define void @zext_v64i8_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -751,9 +732,8 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: zext_v8i8_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
@@ -766,9 +746,8 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) vscale_range(8,0) #0 {
 ; CHECK-LABEL: zext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -804,9 +783,8 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = zext <8 x i16> %a to <8 x i32>
@@ -886,9 +864,8 @@ define void @zext_v64i16_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -900,11 +877,10 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: zext_v8i16_v8i64:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
-; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
+; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
+; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
 ; VBITS_GE_256-NEXT:    uunpklo z1.d, z1.s
@@ -914,9 +890,8 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: zext_v8i16_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -968,9 +943,8 @@ define void @zext_v32i16_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = zext <4 x i32> %a to <4 x i64>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
index 4926684ddc2de7..3c1f6cf05a1270 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
@@ -311,10 +311,7 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -325,10 +322,7 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res
@@ -719,10 +713,7 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -733,10 +724,7 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res
@@ -1127,10 +1115,7 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -1141,10 +1126,7 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res
@@ -1535,10 +1517,7 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -1549,10 +1528,7 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
index 331aab17a4cea7..5b1f3a76ea8816 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
@@ -17,10 +17,7 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: smulh_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x i16> undef, i16 8, i64 0
   %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -37,10 +34,7 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <16 x i8> %op1 to <16 x i16>
   %2 = sext <16 x i8> %op2 to <16 x i16>
@@ -152,10 +146,7 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i16> %op1 to <4 x i32>
   %2 = sext <4 x i16> %op2 to <4 x i32>
@@ -170,10 +161,7 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <8 x i16> %op1 to <8 x i32>
   %2 = sext <8 x i16> %op2 to <8 x i32>
@@ -283,10 +271,7 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i32> %op1 to <2 x i64>
   %2 = sext <2 x i32> %op2 to <2 x i64>
@@ -301,10 +286,7 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i32> %op1 to <4 x i64>
   %2 = sext <4 x i32> %op2 to <4 x i64>
@@ -414,10 +396,7 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %insert = insertelement <1 x i128> undef, i128 64, i128 0
   %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer
@@ -434,10 +413,7 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i64> %op1 to <2 x i128>
   %2 = sext <2 x i64> %op2 to <2 x i128>
@@ -552,10 +528,7 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: umulh_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i8> %op1 to <8 x i16>
   %2 = zext <8 x i8> %op2 to <8 x i16>
@@ -570,10 +543,7 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <16 x i8> %op1 to <16 x i16>
   %2 = zext <16 x i8> %op2 to <16 x i16>
@@ -686,10 +656,7 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i16> %op1 to <4 x i32>
   %2 = zext <4 x i16> %op2 to <4 x i32>
@@ -704,10 +671,7 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i16> %op1 to <8 x i32>
   %2 = zext <8 x i16> %op2 to <8 x i32>
@@ -817,10 +781,7 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i32> %op1 to <2 x i64>
   %2 = zext <2 x i32> %op2 to <2 x i64>
@@ -835,10 +796,7 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i32> %op1 to <4 x i64>
   %2 = zext <4 x i32> %op2 to <4 x i64>
@@ -950,10 +908,7 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <1 x i64> %op1 to <1 x i128>
   %2 = zext <1 x i64> %op2 to <1 x i128>
@@ -968,10 +923,7 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i64> %op1 to <2 x i128>
   %2 = zext <2 x i64> %op2 to <2 x i128>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
index 752c2cd34bfe48..0e8479abc8efeb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
@@ -38,7 +38,6 @@ define i8 @uaddv_v32i8(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <32 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
@@ -55,7 +54,6 @@ define i8 @uaddv_v64i8(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    add z0.b, z1.b, z0.b
 ; VBITS_GE_256-NEXT:    uaddv d0, p0, z0.b
 ; VBITS_GE_256-NEXT:    fmov x0, d0
-; VBITS_GE_256-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: uaddv_v64i8:
@@ -64,7 +62,6 @@ define i8 @uaddv_v64i8(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uaddv d0, p0, z0.b
 ; VBITS_GE_512-NEXT:    fmov x0, d0
-; VBITS_GE_512-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <64 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %op)
@@ -78,7 +75,6 @@ define i8 @uaddv_v128i8(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <128 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %op)
@@ -92,7 +88,6 @@ define i8 @uaddv_v256i8(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <256 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %op)
@@ -128,7 +123,6 @@ define i16 @uaddv_v16i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <16 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
@@ -145,7 +139,6 @@ define i16 @uaddv_v32i16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    add z0.h, z1.h, z0.h
 ; VBITS_GE_256-NEXT:    uaddv d0, p0, z0.h
 ; VBITS_GE_256-NEXT:    fmov x0, d0
-; VBITS_GE_256-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: uaddv_v32i16:
@@ -154,7 +147,6 @@ define i16 @uaddv_v32i16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uaddv d0, p0, z0.h
 ; VBITS_GE_512-NEXT:    fmov x0, d0
-; VBITS_GE_512-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %op)
@@ -168,7 +160,6 @@ define i16 @uaddv_v64i16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <64 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %op)
@@ -182,7 +173,6 @@ define i16 @uaddv_v128i16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <128 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %op)
@@ -218,7 +208,6 @@ define i32 @uaddv_v8i32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <8 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
@@ -235,7 +224,6 @@ define i32 @uaddv_v16i32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    add z0.s, z1.s, z0.s
 ; VBITS_GE_256-NEXT:    uaddv d0, p0, z0.s
 ; VBITS_GE_256-NEXT:    fmov x0, d0
-; VBITS_GE_256-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: uaddv_v16i32:
@@ -244,7 +232,6 @@ define i32 @uaddv_v16i32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uaddv d0, p0, z0.s
 ; VBITS_GE_512-NEXT:    fmov x0, d0
-; VBITS_GE_512-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %op)
@@ -258,7 +245,6 @@ define i32 @uaddv_v32i32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <32 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %op)
@@ -272,7 +258,6 @@ define i32 @uaddv_v64i32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <64 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %op)
@@ -283,7 +268,6 @@ define i32 @uaddv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @uaddv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: uaddv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
@@ -627,7 +611,6 @@ define i32 @smaxv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @smaxv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: smaxv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> %a)
@@ -639,7 +622,6 @@ define i64 @smaxv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: smaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -973,7 +955,6 @@ define i32 @sminv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @sminv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sminv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> %a)
@@ -985,7 +966,6 @@ define i64 @sminv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1319,7 +1299,6 @@ define i32 @umaxv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @umaxv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: umaxv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
@@ -1331,7 +1310,6 @@ define i64 @umaxv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: umaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1665,7 +1643,6 @@ define i32 @uminv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @uminv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: uminv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> %a)
@@ -1677,7 +1654,6 @@ define i64 @uminv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: uminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
index 2d78945399176e..5a0ce6ed1d13e0 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
@@ -31,8 +31,6 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: srem_v8i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -57,13 +55,10 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_256-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_256-NEXT:    mls v0.8b, v3.8b, v1.8b
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: srem_v8i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
@@ -88,7 +83,6 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_512-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_512-NEXT:    mls v0.8b, v3.8b, v1.8b
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = srem <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -122,8 +116,6 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: srem_v16i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -141,13 +133,10 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    splice z3.h, p0, z3.h, z2.h
 ; VBITS_GE_256-NEXT:    uzp1 z2.b, z3.b, z3.b
 ; VBITS_GE_256-NEXT:    mls v0.16b, v2.16b, v1.16b
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: srem_v16i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
@@ -157,7 +146,6 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; VBITS_GE_512-NEXT:    mls v0.16b, v2.16b, v1.16b
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = srem <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -359,28 +347,22 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: srem_v8i16:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    sunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_256-NEXT:    mls v0.8h, v2.8h, v1.8h
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: srem_v8i16:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    sunpklo z2.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z3.s, z0.h
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    mls v0.8h, v2.8h, v1.8h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = srem <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -534,12 +516,9 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -550,12 +529,9 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.4s, v2.4s, v1.4s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -681,12 +657,9 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -698,12 +671,9 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <2 x i64> %op1, %op2
   ret <2 x i64> %res
@@ -849,8 +819,6 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: urem_v8i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -875,13 +843,10 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_256-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_256-NEXT:    mls v0.8b, v3.8b, v1.8b
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: urem_v8i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
@@ -906,7 +871,6 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_512-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_512-NEXT:    mls v0.8b, v3.8b, v1.8b
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = urem <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -940,8 +904,6 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: urem_v16i8:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -959,13 +921,10 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    splice z3.h, p0, z3.h, z2.h
 ; VBITS_GE_256-NEXT:    uzp1 z2.b, z3.b, z3.b
 ; VBITS_GE_256-NEXT:    mls v0.16b, v2.16b, v1.16b
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: urem_v16i8:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
@@ -975,7 +934,6 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; VBITS_GE_512-NEXT:    mls v0.16b, v2.16b, v1.16b
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = urem <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -1177,28 +1135,22 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: urem_v8i16:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    uunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_256-NEXT:    mls v0.8h, v2.8h, v1.8h
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: urem_v8i16:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    uunpklo z2.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z3.s, z0.h
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    mls v0.8h, v2.8h, v1.8h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = urem <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -1352,12 +1304,9 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.2s, v2.2s, v1.2s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -1368,12 +1317,9 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.4s, v2.4s, v1.4s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -1499,12 +1445,9 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -1516,12 +1459,9 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <2 x i64> %op1, %op2
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index 37396ba7011be4..c0401bdb153317 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -401,7 +401,6 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_
 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -422,7 +421,6 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v8i64:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_256-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.d
 ; VBITS_GE_256-NEXT:    mov z0.d, x8
@@ -441,7 +439,6 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
 ;
 ; VBITS_GE_512-LABEL: select_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_512-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_512-NEXT:    ptrue p0.d
 ; VBITS_GE_512-NEXT:    mov z0.d, x8
@@ -462,7 +459,6 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
 define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-LABEL: select_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -483,7 +479,6 @@ define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-LABEL: select_v32i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
index 573fe3d8b8a779..445736587fc3ac 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
@@ -208,12 +208,10 @@ define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i16_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = uitofp <1 x i16> %op1 to <1 x double>
   ret <1 x double> %res
@@ -316,7 +314,6 @@ define void @ucvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
@@ -342,7 +339,6 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i32>, ptr %a
   %res = uitofp <8 x i32> %op1 to <8 x half>
@@ -515,7 +511,6 @@ define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = uitofp <1 x i32> %op1 to <1 x double>
   ret <1 x double> %res
@@ -612,7 +607,6 @@ define void @ucvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i64_v1f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    ucvtf h0, x8
 ; CHECK-NEXT:    ret
@@ -625,11 +619,9 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = uitofp <2 x i64> %op1 to <2 x half>
   ret <2 x half> %res
@@ -643,7 +635,6 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = uitofp <4 x i64> %op1 to <4 x half>
@@ -664,7 +655,6 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f16:
@@ -674,7 +664,6 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ucvtf z0.h, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x i64>, ptr %a
   %res = uitofp <8 x i64> %op1 to <8 x half>
@@ -721,7 +710,6 @@ define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i64_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -747,7 +735,6 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = uitofp <4 x i64> %op1 to <4 x float>
@@ -826,7 +813,6 @@ define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i64_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf d0, d0
 ; CHECK-NEXT:    ret
   %res = uitofp <1 x i64> %op1 to <1 x double>
@@ -1120,12 +1106,10 @@ define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i16_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sitofp <1 x i16> %op1 to <1 x double>
   ret <1 x double> %res
@@ -1234,7 +1218,6 @@ define void @scvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
@@ -1260,7 +1243,6 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i32>, ptr %a
   %res = sitofp <8 x i32> %op1 to <8 x half>
@@ -1433,7 +1415,6 @@ define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = sitofp <1 x i32> %op1 to <1 x double>
   ret <1 x double> %res
@@ -1536,7 +1517,6 @@ define void @scvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i64_v1f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    scvtf h0, x8
 ; CHECK-NEXT:    ret
@@ -1549,11 +1529,9 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sitofp <2 x i64> %op1 to <2 x half>
   ret <2 x half> %res
@@ -1567,7 +1545,6 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = sitofp <4 x i64> %op1 to <4 x half>
@@ -1588,7 +1565,6 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: scvtf_v8i64_v8f16:
@@ -1598,7 +1574,6 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    scvtf z0.h, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x i64>, ptr %a
   %res = sitofp <8 x i64> %op1 to <8 x half>
@@ -1645,7 +1620,6 @@ define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i64_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -1671,7 +1645,6 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = sitofp <4 x i64> %op1 to <4 x float>
@@ -1750,7 +1723,6 @@ define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i64_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    ret
   %res = sitofp <1 x i64> %op1 to <1 x double>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
index d0585274a43e3c..8c1af6149a4265 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
@@ -14,7 +14,6 @@ define i8 @andv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -27,7 +26,6 @@ define i8 @andv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -103,7 +101,6 @@ define i16 @andv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -116,7 +113,6 @@ define i16 @andv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -192,7 +188,6 @@ define i32 @andv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -205,7 +200,6 @@ define i32 @andv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -280,7 +274,6 @@ define i32 @andv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @andv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
@@ -292,7 +285,6 @@ define i64 @andv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -372,7 +364,6 @@ define i8 @eorv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -385,7 +376,6 @@ define i8 @eorv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -461,7 +451,6 @@ define i16 @eorv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -474,7 +463,6 @@ define i16 @eorv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -550,7 +538,6 @@ define i32 @eorv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -563,7 +550,6 @@ define i32 @eorv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -638,7 +624,6 @@ define i32 @eorv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @eorv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %a)
@@ -650,7 +635,6 @@ define i64 @eorv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -730,7 +714,6 @@ define i8 @orv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -743,7 +726,6 @@ define i8 @orv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -819,7 +801,6 @@ define i16 @orv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -832,7 +813,6 @@ define i16 @orv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -908,7 +888,6 @@ define i32 @orv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -921,7 +900,6 @@ define i32 @orv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -996,7 +974,6 @@ define i32 @orv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @orv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %a)
@@ -1008,7 +985,6 @@ define i64 @orv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
index 55d37d1bda5e4c..c7e53bd0d0b011 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
@@ -16,7 +16,6 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer)
   ret <16 x i8> %load
@@ -31,7 +30,6 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer)
   ret <8 x half> %load
@@ -46,7 +44,6 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
   ret <4 x float> %load
@@ -61,7 +58,6 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer)
   ret <2 x double> %load
@@ -72,13 +68,11 @@ define <2 x double> @masked_load_passthru_v2f64(ptr %src, <2 x i1> %mask, <2 x d
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    shl v0.2d, v0.2d, #63
 ; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> %passthru)
   ret <2 x double> %load

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
index c22d9e71c51a93..dcc6c273114e9d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
@@ -23,7 +23,6 @@ define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    mov v0.h[1], w8
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <2 x half>, ptr %ap
   %b = load <2 x half>, ptr %bp
@@ -41,7 +40,6 @@ define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <2 x float>, ptr %ap
   %b = load <2 x float>, ptr %bp
@@ -59,7 +57,6 @@ define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
 ; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %ap
   %b = load <4 x float>, ptr %bp

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
index 6e8d477fc3ad58..5c764d39feb6db 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
@@ -7,10 +7,7 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
 ; CHECK-LABEL: mul_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = mul <2 x i64> %op1, %op2
   ret <2 x i64> %res
@@ -20,10 +17,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i32> %op1, %op2
   ret <4 x i32> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
index 4ba34407ff1846..7c0e576564cdb2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
@@ -16,7 +16,6 @@ define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %el0 = extractelement <vscale x 4 x i1> %a, i32 0
   %el1 = extractelement <vscale x 4 x i1> %a, i32 1

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
index 82d350f6e28f82..1d2915d658ad27 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
@@ -13,9 +13,7 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op)
   ret <8 x i8> %res
@@ -25,9 +23,7 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op)
   ret <16 x i8> %res
@@ -105,9 +101,7 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op)
   ret <4 x i16> %res
@@ -117,9 +111,7 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op)
   ret <8 x i16> %res
@@ -197,9 +189,7 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op)
   ret <2 x i32> %res
@@ -209,9 +199,7 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op)
   ret <4 x i32> %res
@@ -289,9 +277,7 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
   ret <1 x i64> %res
@@ -301,9 +287,7 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d53..e903280404a3fb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -9,10 +9,8 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #1
 ; CHECK-NEXT:    subr z0.b, z0.b, #0 // =0x0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 -2, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
   ret <8 x i8> %res
@@ -22,9 +20,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
   ret <16 x i8> %res
@@ -103,9 +99,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
   ret <4 x i16> %res
@@ -115,10 +109,8 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #3
 ; CHECK-NEXT:    subr z0.h, z0.h, #0 // =0x0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 -8, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
   ret <8 x i16> %res
@@ -197,10 +189,8 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
 ; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 -32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
   ret <2 x i32> %res
@@ -210,9 +200,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
   ret <4 x i32> %res
@@ -291,10 +279,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #7
 ; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 -128, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
   ret <1 x i64> %res
@@ -305,9 +291,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index 749a1866e7192a..17525f4ccc476e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -10,10 +10,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) vscale_range(2,2) #0 {
 ; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #16
+; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
 ; CHECK-NEXT:    st2 { v0.4s, v1.4s }, [x0]
 ; CHECK-NEXT:    ret
   %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
index b633057be139ce..5b194961cbbe84 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
@@ -349,7 +349,6 @@ define void @splat_v32i64(i64 %a, ptr %b) vscale_range(16,0) #0 {
 define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x half> undef, half %a, i64 0
@@ -361,7 +360,6 @@ define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
 define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x half> undef, half %a, i64 0
@@ -372,9 +370,8 @@ define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
 define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v16f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <16 x half> undef, half %a, i64 0
@@ -386,19 +383,17 @@ define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
 define void @splat_v32f16(half %a, ptr %b) #0 {
 ; VBITS_GE_256-LABEL: splat_v32f16:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 def $z0
+; VBITS_GE_256-NEXT:    mov z0.h, h0
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
-; VBITS_GE_256-NEXT:    mov z0.h, h0
 ; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: splat_v32f16:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    mov z0.h, h0
+; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
   %insert = insertelement <32 x half> undef, half %a, i64 0
@@ -410,9 +405,8 @@ define void @splat_v32f16(half %a, ptr %b) #0 {
 define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
 ; CHECK-LABEL: splat_v64f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <64 x half> undef, half %a, i64 0
@@ -424,9 +418,8 @@ define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
 define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
 ; CHECK-LABEL: splat_v128f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <128 x half> undef, half %a, i64 0
@@ -439,7 +432,6 @@ define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <2 x float> undef, float %a, i64 0
@@ -451,7 +443,6 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0
 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x float> undef, float %a, i64 0
@@ -462,9 +453,8 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0
 define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x float> undef, float %a, i64 0
@@ -476,19 +466,17 @@ define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
 define void @splat_v16f32(float %a, ptr %b) #0 {
 ; VBITS_GE_256-LABEL: splat_v16f32:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 def $z0
+; VBITS_GE_256-NEXT:    mov z0.s, s0
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT:    mov z0.s, s0
 ; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
 ; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: splat_v16f32:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    mov z0.s, s0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
   %insert = insertelement <16 x float> undef, float %a, i64 0
@@ -500,9 +488,8 @@ define void @splat_v16f32(float %a, ptr %b) #0 {
 define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
 ; CHECK-LABEL: splat_v32f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <32 x float> undef, float %a, i64 0
@@ -514,9 +501,8 @@ define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
 define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 {
 ; CHECK-LABEL: splat_v64f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <64 x float> undef, float %a, i64 0
@@ -539,7 +525,6 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) vscale_range(2,0)
 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <2 x double> undef, double %a, i64 0
@@ -550,9 +535,8 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0)
 define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x double> undef, double %a, i64 0
@@ -564,19 +548,17 @@ define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
 define void @splat_v8f64(double %a, ptr %b) #0 {
 ; VBITS_GE_256-LABEL: splat_v8f64:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
+; VBITS_GE_256-NEXT:    mov z0.d, d0
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT:    mov z0.d, d0
 ; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
 ; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: splat_v8f64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
-; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    mov z0.d, d0
+; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
   %insert = insertelement <8 x double> undef, double %a, i64 0
@@ -588,9 +570,8 @@ define void @splat_v8f64(double %a, ptr %b) #0 {
 define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
 ; CHECK-LABEL: splat_v16f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <16 x double> undef, double %a, i64 0
@@ -602,9 +583,8 @@ define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
 define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 {
 ; CHECK-LABEL: splat_v32f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <32 x double> undef, double %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
index aef19d23109b4f..8f65308ecbe86f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
@@ -404,7 +404,6 @@ define <8 x i1> @no_warn_dropped_scalable(ptr %in) #0 {
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   br label %bb1

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
index 8dc45eadce6f3b..7e9cdbfe64a6ea 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
@@ -15,7 +15,6 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %in
   %b = trunc <16 x i16> %a to <16 x i8>
@@ -102,7 +101,6 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   %b = trunc <8 x i32> %a to <8 x i8>
@@ -121,7 +119,6 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.b, z0.b, z0.b
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z1.b, z1.b
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: trunc_v16i32_v16i8:
@@ -130,7 +127,6 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) #0 {
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %a = load <16 x i32>, ptr %in
   %b = trunc <16 x i32> %a to <16 x i8>
@@ -185,7 +181,6 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   %b = trunc <8 x i32> %a to <8 x i16>
@@ -273,7 +268,6 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %in
   %b = trunc <4 x i64> %a to <4 x i8>
@@ -293,7 +287,6 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) #0 {
 ; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: trunc_v8i64_v8i8:
@@ -303,7 +296,6 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) #0 {
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %a = load <8 x i64>, ptr %in
   %b = trunc <8 x i64> %a to <8 x i8>
@@ -318,7 +310,6 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <16 x i64>, ptr %in
   %b = trunc <16 x i64> %a to <16 x i8>
@@ -356,7 +347,6 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %in
   %b = trunc <4 x i64> %a to <4 x i16>
@@ -375,7 +365,6 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
-; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: trunc_v8i64_v8i16:
@@ -384,7 +373,6 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) #0 {
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %a = load <8 x i64>, ptr %in
   %b = trunc <8 x i64> %a to <8 x i16>
@@ -439,7 +427,6 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %in
   %b = trunc <4 x i64> %a to <4 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
index 20659cde83ee00..0c32a6212922c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
@@ -24,7 +24,6 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d0, [x0]
 ; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op1:
@@ -33,7 +32,6 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr d0, [x0]
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op1:
@@ -42,7 +40,6 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x0]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op1:
@@ -53,7 +50,6 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x0]
 ; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
-; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -79,7 +75,6 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d0, [x1]
 ; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op2:
@@ -88,7 +83,6 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op2:
@@ -97,7 +91,6 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op2:
@@ -108,7 +101,6 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x1]
 ; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
-; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -135,7 +127,6 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d1, [x1]
 ; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b, z1.b }, z2.b
-; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
@@ -156,7 +147,6 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
-; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
@@ -177,7 +167,6 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
-; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
@@ -198,7 +187,6 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
-; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -225,7 +213,6 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d1, [x1]
 ; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b, z1.b }, z2.b
-; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_poison_value:
@@ -244,7 +231,6 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
-; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_poison_value:
@@ -263,7 +249,6 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
-; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_poison_value:
@@ -282,7 +267,6 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
-; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -297,7 +281,6 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d0, [x1]
 ; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_op1_poison:
@@ -306,7 +289,6 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_op1_poison:
@@ -315,7 +297,6 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
-; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_op1_poison:
@@ -326,7 +307,6 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x1]
 ; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
-; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op2 = load <8 x i8>, ptr %b
   %1 = shufflevector <8 x i8> poison, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15>
@@ -354,7 +334,6 @@ define <8 x i8> @negative_test_shuffle_index_size_op_both_maxhw(ptr %a, ptr %b)
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -382,7 +361,6 @@ define <8 x i8> @shuffle_index_size_op1_maxhw(ptr %a, ptr %b) "target-features"=
 ; CHECK-NEXT:    ldr d1, [x0]
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; CHECK-NEXT:    tbl z0.b, { z1.b }, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -480,7 +458,6 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr q1, [x1]
 ; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
 ; SVE2_128-NEXT:    tbl z0.h, { z0.h, z1.h }, z2.h
-; SVE2_128-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
@@ -496,7 +473,6 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x0]
 ; SVE2_128_NOMAX-NEXT:    ldr q2, [x1]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
-; SVE2_128_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
@@ -512,7 +488,6 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x0]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q2, [x1]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
-; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
@@ -530,7 +505,6 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr q1, [x0]
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr q2, [x1]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
-; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i16>, ptr %a
   %op2 = load <8 x i16>, ptr %b

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index c77861509e4a1d..6a66a841570d3a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -16,7 +16,6 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    frintx h0, h0
 ; CHECK-NEXT:    frintx h1, h1
@@ -281,7 +280,6 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x8, s0
 ; CHECK-NEXT:    fmov d0, x8
@@ -571,8 +569,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; CHECK-LABEL: llrint_v4i64_v4f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
@@ -597,10 +593,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-LABEL: llrint_v8i64_v8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
@@ -639,14 +631,6 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) {
 ; CHECK-LABEL: llrint_v16f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d, vl2
-; CHECK-NEXT:    // kill: def $q6 killed $q6 def $z6
-; CHECK-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-NEXT:    splice z4.d, p1, z4.d, z5.d
@@ -719,14 +703,6 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p1.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-NEXT:    // kill: def $q6 killed $q6 def $z6
-; CHECK-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-NEXT:    // kill: def $q5 killed $q5 def $z5
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-NEXT:    splice z2.d, p1, z2.d, z3.d

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 6a97e7ad64bf3d..da21b44014b534 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -26,7 +26,6 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v2f16:
 ; CHECK-i32:       // %bb.0:
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i32-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-NEXT:    frintx h0, h0
 ; CHECK-i32-NEXT:    frintx h1, h1
@@ -34,12 +33,10 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, h1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f16:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    mov h1, v0.h[1]
 ; CHECK-i64-NEXT:    frintx h0, h0
 ; CHECK-i64-NEXT:    frintx h1, h1
@@ -514,7 +511,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ;
 ; CHECK-i64-LABEL: lrint_v1f32:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    frintx s0, s0
 ; CHECK-i64-NEXT:    fcvtzs x8, s0
 ; CHECK-i64-NEXT:    fmov d0, x8
@@ -573,8 +569,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; CHECK-i32-LABEL: lrint_v8f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
 ; CHECK-i32-NEXT:    movprfx z2, z0
@@ -640,10 +634,6 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 ; CHECK-i32-LABEL: lrint_v16f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
@@ -773,14 +763,6 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 ; CHECK-i32-NEXT:    .cfi_offset w26, -64
 ; CHECK-i32-NEXT:    .cfi_offset w27, -80
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
-; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
 ; CHECK-i32-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
@@ -1048,7 +1030,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, d1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f64:
@@ -1065,8 +1046,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v4f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
 ; CHECK-i32-NEXT:    movprfx z1, z0
@@ -1087,8 +1066,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-i64-LABEL: lrint_v4f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ptrue p0.d, vl2
-; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    frintx z0.d, p0/m, z0.d
@@ -1113,10 +1090,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v8f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
@@ -1150,10 +1123,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-i64-LABEL: lrint_v8f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ptrue p0.d, vl2
-; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
@@ -1192,14 +1161,6 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v16f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
-; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
 ; CHECK-i32-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
@@ -1260,14 +1221,6 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
 ; CHECK-i64-LABEL: lrint_v16f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ptrue p1.d, vl2
-; CHECK-i64-NEXT:    // kill: def $q6 killed $q6 def $z6
-; CHECK-i64-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-i64-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-i64-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-i64-NEXT:    splice z4.d, p1, z4.d, z5.d
@@ -1333,14 +1286,6 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v32f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
-; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
-; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
 ; CHECK-i32-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
@@ -1469,14 +1414,6 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; CHECK-i64-NEXT:    .cfi_offset w30, -8
 ; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    ptrue p1.d, vl2
-; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
-; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-i64-NEXT:    // kill: def $q7 killed $q7 def $z7
-; CHECK-i64-NEXT:    // kill: def $q6 killed $q6 def $z6
-; CHECK-i64-NEXT:    // kill: def $q4 killed $q4 def $z4
-; CHECK-i64-NEXT:    // kill: def $q5 killed $q5 def $z5
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-i64-NEXT:    splice z2.d, p1, z2.d, z3.d

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index b0b6a6a530ddab..09e22b72d0ef07 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -12,9 +12,7 @@ define half @fadda_nxv2f16(half %init, <vscale x 2 x half> %a) {
 ; CHECK-LABEL: fadda_nxv2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.nxv2f16(half %init, <vscale x 2 x half> %a)
   ret half %res
@@ -24,9 +22,7 @@ define half @fadda_nxv4f16(half %init, <vscale x 4 x half> %a) {
 ; CHECK-LABEL: fadda_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.nxv4f16(half %init, <vscale x 4 x half> %a)
   ret half %res
@@ -36,9 +32,7 @@ define half @fadda_nxv8f16(half %init, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fadda_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.nxv8f16(half %init, <vscale x 8 x half> %a)
   ret half %res
@@ -60,7 +54,6 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
 ; CHECK-NEXT:    st1h { z2.d }, p1, [sp, #3, mul vl]
 ; CHECK-NEXT:    ld1h { z2.h }, p0/z, [sp]
 ; CHECK-NEXT:    fadda h0, p0, h0, z2.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -76,7 +69,6 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
@@ -104,7 +96,6 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
 ; CHECK-LABEL: fadda_nxv12f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
 ; CHECK-NEXT:    uunpklo z0.s, z1.h
@@ -121,9 +112,7 @@ define float @fadda_nxv2f32(float %init, <vscale x 2 x float> %a) {
 ; CHECK-LABEL: fadda_nxv2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.nxv2f32(float %init, <vscale x 2 x float> %a)
   ret float %res
@@ -133,9 +122,7 @@ define float @fadda_nxv4f32(float %init, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fadda_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %init, <vscale x 4 x float> %a)
   ret float %res
@@ -145,9 +132,7 @@ define double @fadda_nxv2f64(double %init, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fadda_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fadd.nxv2f64(double %init, <vscale x 2 x double> %a)
   ret double %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
index 15ee6a02a16393..d598f46db0d3c8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
@@ -76,7 +76,6 @@ define half @fmaxv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -87,7 +86,6 @@ define half @fmaxv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -98,7 +96,6 @@ define half @fmaxv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -109,7 +106,6 @@ define float @fmaxv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -120,7 +116,6 @@ define float @fmaxv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -131,7 +126,6 @@ define double @fmaxv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -144,7 +138,6 @@ define half @fminv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -155,7 +148,6 @@ define half @fminv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -166,7 +158,6 @@ define half @fminv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -177,7 +168,6 @@ define float @fminv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -188,7 +178,6 @@ define float @fminv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -199,7 +188,6 @@ define double @fminv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -215,7 +203,6 @@ define half @fmaximumv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmaximum.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -226,7 +213,6 @@ define half @fmaximumv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -237,7 +223,6 @@ define half @fmaximumv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmaximum.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -248,7 +233,6 @@ define float @fmaximumv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmaximum.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -259,7 +243,6 @@ define float @fmaximumv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmaximum.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -270,7 +253,6 @@ define double @fmaximumv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmaximum.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -283,7 +265,6 @@ define half @fminimumv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -294,7 +275,6 @@ define half @fminimumv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -305,7 +285,6 @@ define half @fminimumv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -316,7 +295,6 @@ define float @fminimumv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fminimum.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -327,7 +305,6 @@ define float @fminimumv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fminimum.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -338,7 +315,6 @@ define double @fminimumv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fminimum.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -351,7 +327,6 @@ define float @fadd_reduct_reassoc_v4v8f32(<vscale x 4 x float> %a, <vscale x 8 x
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
 ; CHECK-NEXT:    faddv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.0, <vscale x 4 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float -0.0, <vscale x 8 x float> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll b/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
index a748cf732e0906..b8d7ecff02d115 100644
--- a/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
@@ -5,7 +5,6 @@ define i8 @uaddv_zexti8_nxv16i1(<vscale x 16 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv16i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 16 x i1> %v to <vscale x 16 x i8>
@@ -17,7 +16,6 @@ define i8 @uaddv_zexti8_nxv8i1(<vscale x 8 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv8i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 8 x i1> %v to <vscale x 8 x i8>
@@ -29,7 +27,6 @@ define i16 @uaddv_zexti16_nxv8i1(<vscale x 8 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti16_nxv8i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 8 x i1> %v to <vscale x 8 x i16>
@@ -41,7 +38,6 @@ define i8 @uaddv_zexti8_nxv4i1(<vscale x 4 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv4i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i8>
@@ -53,7 +49,6 @@ define i16 @uaddv_zexti16_nxv4i1(<vscale x 4 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti16_nxv4i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i16>
@@ -65,7 +60,6 @@ define i32 @uaddv_zexti32_nxv4i1(<vscale x 4 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti32_nxv4i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
@@ -77,7 +71,6 @@ define i8 @uaddv_zexti8_nxv2i1(<vscale x 2 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv2i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i8>
@@ -89,7 +82,6 @@ define i16 @uaddv_zexti16_nxv2i1(<vscale x 2 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti16_nxv2i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i16>
@@ -101,7 +93,6 @@ define i32 @uaddv_zexti32_nxv2i1(<vscale x 2 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti32_nxv2i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll b/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
index 73bbee094827e7..ac112aa170f290 100644
--- a/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
+++ b/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
@@ -29,7 +29,6 @@ define <vscale x 8 x i16> @eorv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x i
 define <vscale x 2 x double> @fadda_zero_fill(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: fadda_zero_fill:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 ; CHECK-NEXT:    ret
   %t1 = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
index 433ddbd4a261b2..d1c559736ee2be 100644
--- a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
@@ -7,7 +7,6 @@ define <16 x i8> @v16i8() #0 {
 ; CHECK-LABEL: v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.b, #0, #1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
 }
@@ -16,7 +15,6 @@ define <8 x i16> @v8i16() #0 {
 ; CHECK-LABEL: v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.h, #0, #1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 }
@@ -25,7 +23,6 @@ define <4 x i32> @v4i32() #0 {
 ; CHECK-LABEL: v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 }
@@ -34,7 +31,6 @@ define <2 x i64> @v2i64() #0 {
 ; CHECK-LABEL: v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.d, #0, #1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <2 x i64> <i64 0, i64 1>
 }
@@ -45,7 +41,6 @@ define <8 x i8> @v8i8() #0 {
 ; CHECK-LABEL: v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.b, #0, #1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   ret <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
 }
@@ -54,7 +49,6 @@ define <4 x i16> @v4i16() #0 {
 ; CHECK-LABEL: v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.h, #0, #1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i16> <i16 0, i16 1, i16 2, i16 3>
 }
@@ -63,7 +57,6 @@ define <2 x i32> @v2i32() #0 {
 ; CHECK-LABEL: v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   ret <2 x i32> <i32 0, i32 1>
 }
@@ -75,7 +68,6 @@ define <4 x i32> @v4i32_non_zero_non_one() #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #2
 ; CHECK-NEXT:    orr z0.s, z0.s, #0x1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 }
@@ -85,7 +77,6 @@ define <4 x i32> @v4i32_neg_immediates() #0 {
 ; CHECK-LABEL: v4i32_neg_immediates:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #-1, #-2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
 }
@@ -96,7 +87,6 @@ define <4 x i32> @v4i32_out_range_start() #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #1
 ; CHECK-NEXT:    add z0.s, z0.s, #16 // =0x10
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
 }
@@ -107,7 +97,6 @@ define <4 x i32> @v4i32_out_range_step() #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16 // =0x10
 ; CHECK-NEXT:    index z0.s, #0, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 0, i32 16, i32 32, i32 48>
 }
@@ -119,7 +108,6 @@ define <4 x i32> @v4i32_out_range_start_step() #0 {
 ; CHECK-NEXT:    mov w8, #16 // =0x10
 ; CHECK-NEXT:    index z0.s, #0, w8
 ; CHECK-NEXT:    add z0.s, z0.s, #16 // =0x10
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 16, i32 32, i32 48, i32 64>
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index 7344964f13bbad..608c5ac4fb6745 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -82,7 +82,6 @@ define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat
 ; CHECK-LABEL: test_lane0_8xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl1
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
@@ -247,7 +246,6 @@ define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a,
 define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 8 x half> undef, half %a, i32 0
   ret <vscale x 8 x half> %b
@@ -256,7 +254,6 @@ define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
 define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 4 x half> undef, half %a, i32 0
   ret <vscale x 4 x half> %b
@@ -265,7 +262,6 @@ define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
 define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x half> undef, half %a, i32 0
   ret <vscale x 2 x half> %b
@@ -274,7 +270,6 @@ define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
 define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0
   ret <vscale x 8 x bfloat> %b
@@ -283,7 +278,6 @@ define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
 define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0
   ret <vscale x 4 x bfloat> %b
@@ -292,7 +286,6 @@ define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
 define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0
   ret <vscale x 2 x bfloat> %b
@@ -301,7 +294,6 @@ define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
 define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 4 x float> undef, float %a, i32 0
   ret <vscale x 4 x float> %b
@@ -310,7 +302,6 @@ define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
 define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x float> undef, float %a, i32 0
   ret <vscale x 2 x float> %b
@@ -319,7 +310,6 @@ define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
 define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x double> undef, double %a, i32 0
   ret <vscale x 2 x double> %b
@@ -449,7 +439,6 @@ define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, p0/m, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    and z0.d, z0.d, #0x1
@@ -520,7 +509,6 @@ define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1
 ; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    mov z1.d, x8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmpeq p2.d, p1/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
 ; CHECK-NEXT:    mov z0.d, p2/m, x0

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 5d1d7cf65c09e4..c207738d9b6562 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -5,7 +5,6 @@ define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i6
 ; CHECK-LABEL: insert_v2i64_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0)
@@ -39,7 +38,6 @@ define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i3
 ; CHECK-LABEL: insert_v4i32_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
@@ -73,7 +71,6 @@ define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i1
 ; CHECK-LABEL: insert_v8i16_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0)
@@ -107,7 +104,6 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i
 ; CHECK-LABEL: insert_v16i8_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.b, p0/m, z1.b
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0)
@@ -195,7 +191,6 @@ define void @insert_v2i64_nxv16i64(<2 x i64> %sv0, <2 x i64> %sv1, ptr %out) uwt
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
 ; CHECK-NEXT:    str q1, [sp, #32]
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp, #3, mul vl]
@@ -536,7 +531,6 @@ define <vscale x 8 x bfloat> @insert_nxv8bf16_v8bf16(<vscale x 8 x bfloat> %sv0,
 ; CHECK-LABEL: insert_nxv8bf16_v8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %v0 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> %sv0, <8 x bfloat> %v1, i64 0)

diff  --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
index 8c1b5225b7f257..9ca0ae70fa6f8c 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
@@ -147,7 +147,6 @@ define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
   ret i8 %res
@@ -159,7 +158,6 @@ define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
   ret i16 %res
@@ -171,7 +169,6 @@ define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
   ret i32 %res
@@ -423,7 +420,6 @@ define i8 @uaddv_nxv12i8(<vscale x 12 x i8> %a) {
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i8 @llvm.vector.reduce.add.nxv12i8(<vscale x 12 x i8> %a)
   ret i8 %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
index 02d172fbc9dbfd..581b1ed7a989a2 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
@@ -14,9 +14,7 @@ define i32 @incb(i32 %a) {
 ;
 ; CHECK-LABEL: incb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incb x0, vl5
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5)
   %conv = trunc i64 %cnt to i32
@@ -33,9 +31,7 @@ define i32 @incb_mul(i32 %a) {
 ;
 ; CHECK-LABEL: incb_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incb x0, vl8, mul #4
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 8)
   %conv = trunc i64 %cnt to i32
@@ -57,9 +53,7 @@ define i32 @decb(i32 %a) {
 ;
 ; CHECK-LABEL: decb:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decb x0, vl6
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
   %conv = trunc i64 %cnt to i32
@@ -76,9 +70,7 @@ define i32 @decb_mul(i32 %a) {
 ;
 ; CHECK-LABEL: decb_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decb x0, vl6, mul #8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
   %conv = trunc i64 %cnt to i32
@@ -98,9 +90,7 @@ define i32 @inch(i32 %a) {
 ;
 ; CHECK-LABEL: inch:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    inch x0, #16
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 16)
   %conv = trunc i64 %cnt to i32
@@ -118,9 +108,7 @@ define i32 @inch_mul(i32 %a) {
 ;
 ; CHECK-LABEL: inch_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    inch x0, vl8, mul #5
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8)
   %conv = trunc i64 %cnt to i32
@@ -142,9 +130,7 @@ define i32 @dech(i32 %a) {
 ;
 ; CHECK-LABEL: dech:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dech x0, vl1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1)
   %conv = trunc i64 %cnt to i32
@@ -162,9 +148,7 @@ define i32 @dech_mul(i32 %a) {
 ;
 ; CHECK-LABEL: dech_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dech x0, vl16, mul #7
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9)
   %conv = trunc i64 %cnt to i32
@@ -186,9 +170,7 @@ define i32 @incw(i32 %a) {
 ;
 ; CHECK-LABEL: incw:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incw x0, #16
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16)
   %conv = trunc i64 %cnt to i32
@@ -206,9 +188,7 @@ define i32 @incw_mul(i32 %a) {
 ;
 ; CHECK-LABEL: incw_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incw x0, vl32, mul #12
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10)
   %conv = trunc i64 %cnt to i32
@@ -230,9 +210,7 @@ define i32 @decw(i32 %a) {
 ;
 ; CHECK-LABEL: decw:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decw x0, vl64
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 11)
   %conv = trunc i64 %cnt to i32
@@ -249,9 +227,7 @@ define i32 @decw_mul(i32 %a) {
 ;
 ; CHECK-LABEL: decw_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decw x0, vl128, mul #16
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12)
   %conv = trunc i64 %cnt to i32
@@ -271,9 +247,7 @@ define i32 @incd(i32 %base) {
 ;
 ; CHECK-LABEL: incd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incd x0, vl64
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11)
   %conv = trunc i64 %cnt to i32
@@ -291,9 +265,7 @@ define i32 @incd_mul(i32 %base) {
 ;
 ; CHECK-LABEL: incd_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incd x0, vl64, mul #15
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11)
   %conv = trunc i64 %cnt to i32
@@ -315,9 +287,7 @@ define i32 @decd(i32 %a) {
 ;
 ; CHECK-LABEL: decd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decd x0, #16
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16)
   %conv = trunc i64 %cnt to i32
@@ -335,9 +305,7 @@ define i32 @decd_mul(i32 %a) {
 ;
 ; CHECK-LABEL: decd_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decd x0, vl2, mul #9
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2)
   %conv = trunc i64 %cnt to i32

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
index bf113a7cabc15a..83f1fc5d14a9e8 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
@@ -82,7 +82,6 @@ define <vscale x 2 x i64> @dup_imm_i64(i64 %b) {
 define <vscale x 8 x half> @dup_f16(half %b) {
 ; CHECK-LABEL: dup_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)
@@ -92,7 +91,6 @@ define <vscale x 8 x half> @dup_f16(half %b) {
 define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
 ; CHECK-LABEL: dup_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
@@ -111,7 +109,6 @@ define <vscale x 8 x half> @dup_imm_f16(half %b) {
 define <vscale x 4 x float> @dup_f32(float %b) {
 ; CHECK-LABEL: dup_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b)
@@ -130,7 +127,6 @@ define <vscale x 4 x float> @dup_imm_f32(float %b) {
 define <vscale x 2 x double> @dup_f64(double %b) {
 ; CHECK-LABEL: dup_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b)
@@ -149,7 +145,7 @@ define <vscale x 2 x double> @dup_imm_f64(double %b) {
 define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
 ; CHECK-LABEL: dup_fmov_imm_f32_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1109917696
+; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
 ; CHECK-NEXT:    mov z0.s, w8
 ; CHECK-NEXT:    ret
   %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01)
@@ -159,7 +155,7 @@ define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
 define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
 ; CHECK-LABEL: dup_fmov_imm_f32_4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1109917696
+; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
 ; CHECK-NEXT:    mov z0.s, w8
 ; CHECK-NEXT:    ret
   %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01)
@@ -169,7 +165,7 @@ define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
 define <vscale x 2 x double> @dup_fmov_imm_f64_2() {
 ; CHECK-LABEL: dup_fmov_imm_f64_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4631107791820423168
+; CHECK-NEXT:    mov x8, #4631107791820423168 // =0x4045000000000000
 ; CHECK-NEXT:    mov z0.d, x8
 ; CHECK-NEXT:    ret
   %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
index 95de90078d3416..95ec7b2b396599 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
@@ -8,9 +8,7 @@
 define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fadda_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fadda.nxv8f16(<vscale x 8 x i1> %pg,
                                                    half %init,
@@ -21,9 +19,7 @@ define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a
 define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fadda_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fadda.nxv4f32(<vscale x 4 x i1> %pg,
                                                     float %init,
@@ -34,9 +30,7 @@ define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float>
 define double @fadda_f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fadda_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg,
                                                      double %init,
@@ -52,7 +46,6 @@ define half @faddv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: faddv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    faddv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.faddv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
@@ -63,7 +56,6 @@ define float @faddv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: faddv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    faddv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
@@ -74,7 +66,6 @@ define double @faddv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: faddv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    faddv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
@@ -89,7 +80,6 @@ define half @fmaxnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fmaxnmv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16(<vscale x 8 x i1> %pg,
                                                      <vscale x 8 x half> %a)
@@ -100,7 +90,6 @@ define float @fmaxnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fmaxnmv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1> %pg,
                                                       <vscale x 4 x float> %a)
@@ -111,7 +100,6 @@ define double @fmaxnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fmaxnmv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64(<vscale x 2 x i1> %pg,
                                                        <vscale x 2 x double> %a)
@@ -126,7 +114,6 @@ define half @fmaxv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fmaxv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
@@ -137,7 +124,6 @@ define float @fmaxv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fmaxv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fmaxv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
@@ -148,7 +134,6 @@ define double @fmaxv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fmaxv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
@@ -163,7 +148,6 @@ define half @fminnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fminnmv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fminnmv.nxv8f16(<vscale x 8 x i1> %pg,
                                                      <vscale x 8 x half> %a)
@@ -174,7 +158,6 @@ define float @fminnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fminnmv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1> %pg,
                                                       <vscale x 4 x float> %a)
@@ -185,7 +168,6 @@ define double @fminnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fminnmv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fminnmv.nxv2f64(<vscale x 2 x i1> %pg,
                                                        <vscale x 2 x double> %a)
@@ -200,7 +182,6 @@ define half @fminv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fminv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fminv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
@@ -211,7 +192,6 @@ define float @fminv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fminv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fminv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
@@ -222,7 +202,6 @@ define double @fminv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fminv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index 3096b509884765..3e6f5a0df6fa17 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -596,8 +596,6 @@ define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
 define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) {
 ; CHECK-LABEL: dupq_f32_repeat_complex:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    ret
@@ -613,8 +611,6 @@ define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %
 define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) {
 ; CHECK-LABEL: dupq_f16_repeat_complex:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
@@ -635,8 +631,6 @@ define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ;
 ; SVE2-LABEL: ext_i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #255
 ; SVE2-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
@@ -653,8 +647,6 @@ define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ;
 ; SVE2-LABEL: ext_i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #0
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
@@ -671,8 +663,6 @@ define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ;
 ; SVE2-LABEL: ext_i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
@@ -689,8 +679,6 @@ define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ;
 ; SVE2-LABEL: ext_i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #16
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
@@ -707,8 +695,6 @@ define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x b
 ;
 ; SVE2-LABEL: ext_bf16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #6
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
@@ -725,8 +711,6 @@ define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 ;
 ; SVE2-LABEL: ext_f16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #6
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
@@ -743,8 +727,6 @@ define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x floa
 ;
 ; SVE2-LABEL: ext_f32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #16
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
@@ -761,8 +743,6 @@ define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x do
 ;
 ; SVE2-LABEL: ext_f64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #40
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
@@ -1154,8 +1134,6 @@ define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 ;
 ; SVE2-LABEL: splice_i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
@@ -1172,8 +1150,6 @@ define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 ;
 ; SVE2-LABEL: splice_i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
@@ -1190,8 +1166,6 @@ define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 ;
 ; SVE2-LABEL: splice_i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1208,8 +1182,6 @@ define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 ; SVE2-LABEL: splice_i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
@@ -1226,8 +1198,6 @@ define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x b
 ;
 ; SVE2-LABEL: splice_bf16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
@@ -1244,8 +1214,6 @@ define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
 ;
 ; SVE2-LABEL: splice_f16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
@@ -1262,8 +1230,6 @@ define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x flo
 ;
 ; SVE2-LABEL: splice_f32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
@@ -1280,8 +1246,6 @@ define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x do
 ;
 ; SVE2-LABEL: splice_f64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index 5127fa7e934803..a51c5de893f258 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -153,7 +153,6 @@ define <vscale x 16 x i1> @chained_reinterpret() {
 define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    ret
@@ -166,7 +165,6 @@ define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
 define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
@@ -179,7 +177,6 @@ define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
 define <vscale x 16 x i1> @reinterpret_scalar_bool_q(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_q:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
index 310fba47780258..bedef200feeca1 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -173,7 +173,6 @@ define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) {
 define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
 ; CHECK-LABEL: insr_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    insr z0.h, h1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
@@ -183,7 +182,6 @@ define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
 define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
 ; CHECK-LABEL: insr_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    insr z0.h, h1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
@@ -193,7 +191,6 @@ define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0
 define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
 ; CHECK-LABEL: insr_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    insr z0.s, s1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
@@ -203,7 +200,6 @@ define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
 define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
 ; CHECK-LABEL: insr_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    insr z0.d, d1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
index f4ab2025f71822..2beda0d664f5ff 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
@@ -92,9 +92,7 @@ define <vscale x 2 x i64> @sqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 define i32 @sqdecb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
@@ -103,7 +101,6 @@ define i32 @sqdecb_n32_i32(i32 %a) {
 define i64 @sqdecb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
@@ -128,9 +125,7 @@ define i64 @sqdecb_n64(i64 %a) {
 define i32 @sqdech_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdech x0, w0, vl5, mul #6
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
@@ -139,7 +134,6 @@ define i32 @sqdech_n32_i32(i32 %a) {
 define i64 @sqdech_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdech x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 3, i32 4)
@@ -164,9 +158,7 @@ define i64 @sqdech_n64(i64 %a) {
 define i32 @sqdecw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecw x0, w0, vl7, mul #8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
@@ -175,7 +167,6 @@ define i32 @sqdecw_n32_i32(i32 %a) {
 define i64 @sqdecw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecw x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 3, i32 4)
@@ -200,9 +191,7 @@ define i64 @sqdecw_n64(i64 %a) {
 define i32 @sqdecd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecd x0, w0, vl16, mul #10
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
@@ -211,7 +200,6 @@ define i32 @sqdecd_n32_i32(i32 %a) {
 define i64 @sqdecd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecd x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 3, i32 4)
@@ -236,9 +224,7 @@ define i64 @sqdecd_n64(i64 %a) {
 define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.b, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
@@ -247,7 +233,6 @@ define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.b, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
@@ -259,9 +244,7 @@ define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.h, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
@@ -270,7 +253,6 @@ define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.h, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
@@ -282,9 +264,7 @@ define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.s, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
@@ -293,7 +273,6 @@ define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.s, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
@@ -305,9 +284,7 @@ define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.d, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
@@ -316,7 +293,6 @@ define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 define i64 @sqdecp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.d, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
index aa3403b714cbcc..449ad444198b75 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
@@ -92,9 +92,7 @@ define <vscale x 2 x i64> @sqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 define i32 @sqincb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
@@ -103,7 +101,6 @@ define i32 @sqincb_n32_i32(i32 %a) {
 define i64 @sqincb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
@@ -128,9 +125,7 @@ define i64 @sqincb_n64(i64 %a) {
 define i32 @sqinch_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqinch x0, w0, vl5, mul #6
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
@@ -139,7 +134,6 @@ define i32 @sqinch_n32_i32(i32 %a) {
 define i64 @sqinch_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqinch x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 3, i32 4)
@@ -164,9 +158,7 @@ define i64 @sqinch_n64(i64 %a) {
 define i32 @sqincw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincw x0, w0, vl7, mul #8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
@@ -175,7 +167,6 @@ define i32 @sqincw_n32_i32(i32 %a) {
 define i64 @sqincw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincw x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 3, i32 4)
@@ -200,9 +191,7 @@ define i64 @sqincw_n64(i64 %a) {
 define i32 @sqincd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincd x0, w0, vl16, mul #10
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
@@ -211,7 +200,6 @@ define i32 @sqincd_n32_i32(i32 %a) {
 define i64 @sqincd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincd x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 3, i32 4)
@@ -236,9 +224,7 @@ define i64 @sqincd_n64(i64 %a) {
 define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.b, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
@@ -247,7 +233,6 @@ define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.b, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
@@ -259,9 +244,7 @@ define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.h, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
@@ -270,7 +253,6 @@ define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.h, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
@@ -282,9 +264,7 @@ define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.s, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
@@ -293,7 +273,6 @@ define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.s, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
@@ -305,9 +284,7 @@ define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.d, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
@@ -316,7 +293,6 @@ define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 define i64 @sqincp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.d, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index 579358ecb887ef..b8d171dac45e2f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -15,8 +15,6 @@
 define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 2, i64 0
@@ -30,9 +28,7 @@ define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -46,9 +42,7 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vsca
 define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #-18
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -18, i64 0
@@ -62,9 +56,7 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #16
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 16, i64 0
@@ -78,8 +70,6 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16, i64 0
@@ -93,8 +83,6 @@ define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14, i64 0
@@ -112,8 +100,6 @@ define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 2, i64 0
@@ -127,8 +113,6 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 2, i64 0
@@ -146,8 +130,6 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 4, i64 0
@@ -161,8 +143,6 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 6, i64 0
@@ -180,8 +160,6 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 8, i64 0
@@ -195,8 +173,6 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 10, i64 0
@@ -214,9 +190,6 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -231,10 +204,7 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -249,10 +219,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
 define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #5
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -267,10 +234,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
 define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #-27
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -27, i64 0
@@ -285,10 +249,7 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #24
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 24, i64 0
@@ -303,9 +264,6 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24, i64 0
@@ -320,9 +278,6 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21, i64 0
@@ -341,9 +296,6 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 6, i64 0
@@ -358,9 +310,6 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 9, i64 0
@@ -379,9 +328,6 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 12, i64 0
@@ -396,9 +342,6 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 15, i64 0
@@ -417,9 +360,6 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 18, i64 0
@@ -434,9 +374,6 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 -3, i64 0
@@ -455,10 +392,6 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -474,11 +407,7 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #5
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -494,11 +423,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
 define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #6
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 6, i64 0
@@ -514,11 +439,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
 define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 7, i64 0
@@ -536,12 +457,8 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov x9, #-576 // =0xfffffffffffffdc0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
@@ -563,12 +480,8 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov w9, #512 // =0x200
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
@@ -588,10 +501,6 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32, i64 0
@@ -607,10 +516,6 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28, i64 0
@@ -630,10 +535,6 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 8, i64 0
@@ -649,10 +550,6 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 12, i64 0
@@ -672,10 +569,6 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 16, i64 0
@@ -691,10 +584,6 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 20, i64 0
@@ -714,10 +603,6 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 24, i64 0
@@ -733,10 +618,6 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index c8fc8d7a70cc68..8a7a5b7be34a90 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -9,8 +9,6 @@
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -28,8 +26,6 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -43,8 +39,6 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -62,8 +56,6 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -77,8 +69,6 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -96,8 +86,6 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -111,8 +99,6 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
@@ -130,9 +116,6 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -151,9 +134,6 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -168,9 +148,6 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -189,9 +166,6 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -206,9 +180,6 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -227,9 +198,6 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -244,9 +212,6 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
@@ -265,10 +230,6 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -288,10 +249,6 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -307,10 +264,6 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -330,10 +283,6 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -349,10 +298,6 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -372,10 +317,6 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -391,10 +332,6 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index b09baa6bf7e0ac..5aa0fd7eb7e453 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -9,8 +9,6 @@
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
@@ -27,8 +25,6 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
@@ -41,8 +37,6 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
@@ -55,8 +49,6 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st2h_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -73,8 +65,6 @@ define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
@@ -87,8 +77,6 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
@@ -105,8 +93,6 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
@@ -119,8 +105,6 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
@@ -133,8 +117,6 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_ptr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -151,9 +133,6 @@ define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -171,9 +150,6 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -187,9 +163,6 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -203,9 +176,6 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st3h_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -223,9 +193,6 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -239,9 +206,6 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -259,9 +223,6 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -275,9 +236,6 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -291,9 +249,6 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_ptr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -311,10 +266,6 @@ define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -333,10 +284,6 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -351,10 +298,6 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -369,10 +312,6 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st4h_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -391,10 +330,6 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -409,10 +344,6 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -431,10 +362,6 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -449,10 +376,6 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
@@ -467,10 +390,6 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x ptr> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_ptr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2p0(<vscale x 2 x ptr> %v0,

diff  --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
index 47758893ce7117..a326da2b20f4a0 100644
--- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
@@ -11,10 +11,10 @@ declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x
 define void @foo1(ptr %outval, <vscale x 2 x i1> %pred, ptr %inptr) {
 ; CHECK-LABEL: foo1:
 ; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1]
-; CHECK-NEXT: faddv d2, p0, z0.d
-; CHECK-NEXT: faddv d0, p0, z1.d
-; CHECK-NEXT: mov v2.d[1], v0.d[0]
-; CHECK-NEXT: str q2, [x0]
+; CHECK-NEXT: faddv d0, p0, z0.d
+; CHECK-NEXT: faddv d1, p0, z1.d
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str q0, [x0]
   %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1
   %1 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> %pred, ptr nonnull %inptr)
   %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
index bcb878ad744bb4..0853f80a755407 100644
--- a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
@@ -10,7 +10,6 @@ define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind {
 ; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0
   ret <4 x i32> %load
@@ -21,7 +20,6 @@ define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    shl v1.4s, v1.4s, #31
 ; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0

diff  --git a/llvm/test/CodeGen/AArch64/sve-pr62151.ll b/llvm/test/CodeGen/AArch64/sve-pr62151.ll
index 5ed34f14a0b140..36594baa249164 100644
--- a/llvm/test/CodeGen/AArch64/sve-pr62151.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pr62151.ll
@@ -7,7 +7,6 @@ define i32 @build_interpolation(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mul v0.2s, v1.2s, v0.2s
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    mla v0.2s, v1.2s, v0.s[1]
 ; CHECK-NEXT:    fmov w0, s0

diff  --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index 8438e9d88f5de0..e60a3ece699cd8 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -1064,9 +1064,8 @@ entry:
 define <vscale x 4 x float> @faddqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) {
 ; CHECK-LABEL: faddqr_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
-; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, s2
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fadd z1.s, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
@@ -1083,9 +1082,8 @@ entry:
 define <vscale x 8 x half> @faddqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) {
 ; CHECK-LABEL: faddqr_v8f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, h2
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    fadd z1.h, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
@@ -1102,9 +1100,8 @@ entry:
 define <vscale x 4 x float> @fsubqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) {
 ; CHECK-LABEL: fsubqr_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
-; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, s2
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fsub z1.s, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
@@ -1121,9 +1118,8 @@ entry:
 define <vscale x 8 x half> @fsubqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) {
 ; CHECK-LABEL: fsubqr_v8f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, h2
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    fsub z1.h, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
@@ -1140,9 +1136,8 @@ entry:
 define <vscale x 4 x float> @fmulqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) {
 ; CHECK-LABEL: fmulqr_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
-; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, s2
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
@@ -1159,9 +1154,8 @@ entry:
 define <vscale x 8 x half> @fmulqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) {
 ; CHECK-LABEL: fmulqr_v8f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, h2
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h

diff  --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index b1270165556e67..2a1bfc4168af4b 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -4,7 +4,6 @@
 define <vscale x  1 x i8> @select_nxv1i8(i1 %cond, <vscale x  1 x i8> %a, <vscale x  1 x i8> %b) {
 ; CHECK-LABEL: select_nxv1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
@@ -16,7 +15,6 @@ define <vscale x  1 x i8> @select_nxv1i8(i1 %cond, <vscale x  1 x i8> %a, <vscal
 define <vscale x  16 x i8> @select_nxv16i8(i1 %cond, <vscale x  16 x i8> %a, <vscale x  16 x i8> %b) {
 ; CHECK-LABEL: select_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
@@ -28,7 +26,6 @@ define <vscale x  16 x i8> @select_nxv16i8(i1 %cond, <vscale x  16 x i8> %a, <vs
 define <vscale x  1 x i16> @select_nxv1i16(i1 %cond, <vscale x  1 x i16> %a, <vscale x  1 x i16> %b) {
 ; CHECK-LABEL: select_nxv1i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
@@ -40,7 +37,6 @@ define <vscale x  1 x i16> @select_nxv1i16(i1 %cond, <vscale x  1 x i16> %a, <vs
 define <vscale x  8 x i16> @select_nxv8i16(i1 %cond, <vscale x  8 x i16> %a, <vscale x  8 x i16> %b) {
 ; CHECK-LABEL: select_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
@@ -52,7 +48,6 @@ define <vscale x  8 x i16> @select_nxv8i16(i1 %cond, <vscale x  8 x i16> %a, <vs
 define <vscale x  1 x i32> @select_nxv1i32(i1 %cond, <vscale x  1 x i32> %a, <vscale x  1 x i32> %b) {
 ; CHECK-LABEL: select_nxv1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
@@ -64,7 +59,6 @@ define <vscale x  1 x i32> @select_nxv1i32(i1 %cond, <vscale x  1 x i32> %a, <vs
 define <vscale x  4 x i32> @select_nxv4i32(i1 %cond, <vscale x  4 x i32> %a, <vscale x  4 x i32> %b) {
 ; CHECK-LABEL: select_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
@@ -76,7 +70,6 @@ define <vscale x  4 x i32> @select_nxv4i32(i1 %cond, <vscale x  4 x i32> %a, <vs
 define <vscale x  1 x i64> @select_nxv1i64(i1 %cond, <vscale x  1 x i64> %a, <vscale x  1 x i64> %b) {
 ; CHECK-LABEL: select_nxv1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
@@ -88,7 +81,6 @@ define <vscale x  1 x i64> @select_nxv1i64(i1 %cond, <vscale x  1 x i64> %a, <vs
 define <vscale x  2 x i64> @select_nxv2i64(i1 %cond, <vscale x  2 x i64> %a, <vscale x  2 x i64> %b) {
 ; CHECK-LABEL: select_nxv2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
@@ -100,7 +92,6 @@ define <vscale x  2 x i64> @select_nxv2i64(i1 %cond, <vscale x  2 x i64> %a, <vs
 define <vscale x  8 x half> @select_nxv8f16(i1 %cond, <vscale x  8 x half> %a, <vscale x  8 x half> %b) {
 ; CHECK-LABEL: select_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
@@ -112,7 +103,6 @@ define <vscale x  8 x half> @select_nxv8f16(i1 %cond, <vscale x  8 x half> %a, <
 define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a, <vscale x  4 x float> %b) {
 ; CHECK-LABEL: select_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
@@ -124,7 +114,6 @@ define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a,
 define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %a, <vscale x  2 x double> %b) {
 ; CHECK-LABEL: select_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
@@ -136,7 +125,6 @@ define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %
 define <vscale x  16 x i1> @select_nxv16i1(i1 %cond, <vscale x  16 x i1> %a, <vscale x  16 x i1> %b) {
 ; CHECK-LABEL: select_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.b, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -148,7 +136,6 @@ define <vscale x  16 x i1> @select_nxv16i1(i1 %cond, <vscale x  16 x i1> %a, <vs
 define <vscale x  8 x i1> @select_nxv8i1(i1 %cond, <vscale x  8 x i1> %a, <vscale x  8 x i1> %b) {
 ; CHECK-LABEL: select_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.h, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -160,7 +147,6 @@ define <vscale x  8 x i1> @select_nxv8i1(i1 %cond, <vscale x  8 x i1> %a, <vscal
 define <vscale x  4 x i1> @select_nxv4i1(i1 %cond, <vscale x  4 x i1> %a, <vscale x  4 x i1> %b) {
 ; CHECK-LABEL: select_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.s, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -172,7 +158,6 @@ define <vscale x  4 x i1> @select_nxv4i1(i1 %cond, <vscale x  4 x i1> %a, <vscal
 define <vscale x  2 x i1> @select_nxv2i1(i1 %cond, <vscale x  2 x i1> %a, <vscale x  2 x i1> %b) {
 ; CHECK-LABEL: select_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.d, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -184,7 +169,6 @@ define <vscale x  2 x i1> @select_nxv2i1(i1 %cond, <vscale x  2 x i1> %a, <vscal
 define <vscale x  1 x i1> @select_nxv1i1(i1 %cond, <vscale x  1 x i1> %a, <vscale x  1 x i1> %b) {
 ; CHECK-LABEL: select_nxv1i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.d, xzr, x8
 ; CHECK-NEXT:    punpklo p2.h, p2.b

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
index 76190eba870ded..9012f38a9742f7 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
@@ -9,7 +9,6 @@ define i32 @promote_extract_2i32_idx(<vscale x 2 x i32> %a, i32 %idx) {
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb x0, p0, z0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ext = extractelement <vscale x 2 x i32> %a, i32 %idx
   ret i32 %ext

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
index 696b6c34ef0415..7da0a25c357034 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
@@ -7,12 +7,10 @@ define double @fadda_nxv8f64(double %init, <vscale x 8 x double> %a) {
 ; CHECK-LABEL: fadda_nxv8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 ; CHECK-NEXT:    fadda d0, p0, d0, z2.d
 ; CHECK-NEXT:    fadda d0, p0, d0, z3.d
 ; CHECK-NEXT:    fadda d0, p0, d0, z4.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fadd.nxv8f64(double %init, <vscale x 8 x double> %a)
   ret double %res
@@ -42,7 +40,6 @@ define double @fmaxv_nxv8f64(<vscale x 8 x double> %a) {
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z2.d
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmax.nxv8f64(<vscale x 8 x double> %a)
   ret double %res
@@ -56,7 +53,6 @@ define half @fminv_nxv16f16(<vscale x 16 x half> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv16f16(<vscale x 16 x half> %a)
   ret half %res
@@ -72,7 +68,6 @@ define double @fmaximumv_nxv8f64(<vscale x 8 x double> %a) {
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z2.d
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmaximum.nxv8f64(<vscale x 8 x double> %a)
   ret double %res
@@ -86,7 +81,6 @@ define half @fminimumv_nxv16f16(<vscale x 16 x half> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv16f16(<vscale x 16 x half> %a)
   ret half %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
index dd7b15ef5ee6f4..c6bc11a1d681ff 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
@@ -34,7 +34,6 @@ define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
   ret i32 %res
@@ -62,7 +61,6 @@ define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
   ret i16 %res
@@ -88,7 +86,6 @@ define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
   ret i16 %res
@@ -101,7 +98,6 @@ define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
   ret i16 %res
@@ -116,7 +112,6 @@ define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
   ret i32 %res
@@ -131,7 +126,6 @@ define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
   ret i32 %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index ec94198a08ca7b..c9d73e3771aa05 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefix=CHECK-FRAMELAYOUT
 
 ; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
@@ -168,7 +168,6 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    .cfi_offset w29, -24
 ; CHECK-NEXT:    .cfi_offset b8, -32
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x8, x0, #2, #32
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    add x8, x8, #15
@@ -602,3 +601,5 @@ entry:
   ret i32 %x
 }
 declare void @other()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-FRAMELAYOUT: {{.*}}

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index 478072d33d8c9b..63e20b55b10a40 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -10,10 +10,8 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi8:
@@ -36,10 +34,8 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI1_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi8:
@@ -68,10 +64,8 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_16xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI2_0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi8:
@@ -112,13 +106,9 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 ; CHECK-LABEL: vls_sve_and_32xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_32xi8:
@@ -184,11 +174,9 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi16:
@@ -207,10 +195,8 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI5_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi16:
@@ -233,10 +219,8 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI6_0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI6_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi16:
@@ -265,13 +249,9 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_16xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI7_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi16:
@@ -313,9 +293,7 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z1.s, #0, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi32:
@@ -334,10 +312,8 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI9_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi32:
@@ -358,13 +334,9 @@ define <8 x i32> @vls_sve_and_8xi32(<8 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi32:
@@ -390,9 +362,7 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z1.d, #0, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi64:
@@ -411,12 +381,8 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z2.d, #0, #-1
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index bd49db8a4c4149..b6e732a7b9b114 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -12,12 +12,10 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: ctlz_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sub z0.h, z0.h, #8 // =0x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i8:
@@ -51,9 +49,7 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: ctlz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v8i8:
@@ -103,9 +99,7 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: ctlz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v16i8:
@@ -339,12 +333,10 @@ define void @ctlz_v32i8(ptr %a) {
 define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: ctlz_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
 ; CHECK-NEXT:    sub z0.s, z0.s, #16 // =0x10
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i16:
@@ -369,9 +361,7 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: ctlz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i16:
@@ -405,9 +395,7 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: ctlz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v8i16:
@@ -546,9 +534,7 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: ctlz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i32:
@@ -571,9 +557,7 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: ctlz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i32:
@@ -646,9 +630,7 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: ctlz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v1i64:
@@ -669,9 +651,7 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: ctlz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i64:
@@ -732,11 +712,9 @@ define void @ctlz_v4i64(ptr %a) {
 define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: ctpop_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i8:
@@ -807,9 +785,7 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: ctpop_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v8i8:
@@ -932,9 +908,7 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: ctpop_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v16i8:
@@ -1602,11 +1576,9 @@ define void @ctpop_v32i8(ptr %a) {
 define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: ctpop_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i16:
@@ -1650,9 +1622,7 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: ctpop_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i16:
@@ -1723,9 +1693,7 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: ctpop_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v8i16:
@@ -2082,9 +2050,7 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: ctpop_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i32:
@@ -2128,9 +2094,7 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: ctpop_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i32:
@@ -2325,9 +2289,7 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: ctpop_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v1i64:
@@ -2359,9 +2321,7 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: ctpop_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i64:
@@ -2484,12 +2444,10 @@ define void @ctpop_v4i64(ptr %a) {
 define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: cttz_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    orr z0.h, z0.h, #0x100
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i8:
@@ -2527,10 +2485,8 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: cttz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v8i8:
@@ -2588,10 +2544,8 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: cttz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v16i8:
@@ -2875,12 +2829,10 @@ define void @cttz_v32i8(ptr %a) {
 define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: cttz_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    orr z0.s, z0.s, #0x10000
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i16:
@@ -2907,10 +2859,8 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: cttz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i16:
@@ -2948,10 +2898,8 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: cttz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v8i16:
@@ -3116,10 +3064,8 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: cttz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i32:
@@ -3144,10 +3090,8 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: cttz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i32:
@@ -3234,10 +3178,8 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: cttz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v1i64:
@@ -3259,10 +3201,8 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: cttz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index d3c446c9904b2b..4be262fd3f33fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -28,8 +28,6 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
 ; CHECK-NEXT:    and z2.d, z2.d, z5.d
 ; CHECK-NEXT:    orr z1.d, z3.d, z1.d
 ; CHECK-NEXT:    orr z0.d, z0.d, z2.d
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index 9729a1d95cd916..fd043e3afe4a86 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -250,8 +250,6 @@ define void @build_vector_non_const_v4i1(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) {
 define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
@@ -273,8 +271,6 @@ define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) {
 define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -297,10 +293,6 @@ define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) {
 define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d, ptr %out)  {
 ; CHECK-LABEL: build_vector_non_const_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $z3
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    zip1 z2.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z2.d
@@ -328,10 +320,6 @@ define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d
 define void @build_vector_non_const_v4f64(double %a, double %b, double %c, double %d, ptr %out)  {
 ; CHECK-LABEL: build_vector_non_const_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $z3
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    stp q0, q2, [x0]
@@ -357,14 +345,6 @@ define void @build_vector_non_const_v4f64(double %a, double %b, double %c, doubl
 define void @build_vector_non_const_v8f16(half %a, half %b, half %c, half %d, half %e, half %f, half %g, half %h, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h6 killed $h6 def $z6
-; CHECK-NEXT:    // kill: def $h4 killed $h4 def $z4
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    // kill: def $h7 killed $h7 def $z7
-; CHECK-NEXT:    // kill: def $h5 killed $h5 def $z5
-; CHECK-NEXT:    // kill: def $h3 killed $h3 def $z3
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    zip1 z6.h, z6.h, z7.h
 ; CHECK-NEXT:    zip1 z4.h, z4.h, z5.h
 ; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index 619840fc6afb28..5ec63396a2f5f1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -12,8 +12,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 ; CHECK-LABEL: concat_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.h, z1.h[3]
 ; CHECK-NEXT:    mov z3.h, z1.h[2]
 ; CHECK-NEXT:    mov z4.h, z1.h[1]
@@ -27,7 +25,6 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8i8:
@@ -61,11 +58,8 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2)  {
 ; CHECK-LABEL: concat_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v16i8:
@@ -140,14 +134,11 @@ define void @concat_v64i8(ptr %a, ptr %b, ptr %c) {
 define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
 ; CHECK-LABEL: concat_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    mov z3.s, z0.s[1]
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4i16:
@@ -172,11 +163,8 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
 define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2)  {
 ; CHECK-LABEL: concat_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8i16:
@@ -245,10 +233,7 @@ define void @concat_v32i16(ptr %a, ptr %b, ptr %c) {
 define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
 ; CHECK-LABEL: concat_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2i32:
@@ -270,11 +255,8 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
 define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2)  {
 ; CHECK-LABEL: concat_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4i32:
@@ -340,11 +322,8 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
 define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2)  {
 ; CHECK-LABEL: concat_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2i64:
@@ -406,13 +385,27 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
 ;
 
 define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
-; CHECK-LABEL: concat_v4f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT:    ret
+; SVE2-LABEL: concat_v4f16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    cnth x8
+; SVE2-NEXT:    adrp x9, .LCPI15_0
+; SVE2-NEXT:    adrp x10, .LCPI15_1
+; SVE2-NEXT:    mov z2.h, w8
+; SVE2-NEXT:    ldr q3, [x9, :lo12:.LCPI15_0]
+; SVE2-NEXT:    ldr q4, [x10, :lo12:.LCPI15_1]
+; SVE2-NEXT:    ptrue p0.h, vl8
+; SVE2-NEXT:    mad z2.h, p0/m, z3.h, z4.h
+; SVE2-NEXT:    tbl z0.h, { z0.h, z1.h }, z2.h
+; SVE2-NEXT:    ret
+;
+; SME-LABEL: concat_v4f16:
+; SME:       // %bb.0:
+; SME-NEXT:    mov z2.h, z1.h[1]
+; SME-NEXT:    mov z3.h, z0.h[1]
+; SME-NEXT:    zip1 z1.h, z1.h, z2.h
+; SME-NEXT:    zip1 z0.h, z0.h, z3.h
+; SME-NEXT:    zip1 z0.s, z0.s, z1.s
+; SME-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4f16:
 ; NONEON-NOSVE:       // %bb.0:
@@ -432,11 +425,8 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
 define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2)  {
 ; CHECK-LABEL: concat_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8f16:
@@ -505,10 +495,7 @@ define void @concat_v32f16(ptr %a, ptr %b, ptr %c) {
 define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
 ; CHECK-LABEL: concat_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2f32:
@@ -530,11 +517,8 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
 define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2)  {
 ; CHECK-LABEL: concat_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4f32:
@@ -600,11 +584,8 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
 define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2)  {
 ; CHECK-LABEL: concat_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 7d6336a43a4fd1..8d009867232bf2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -10,7 +10,6 @@ define <8 x i16> @load_zext_v8i8i16(ptr %ap)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v8i8i16:
@@ -48,7 +47,6 @@ define <4 x i32> @load_zext_v4i16i32(ptr %ap)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v4i16i32:
@@ -76,7 +74,6 @@ define <2 x i64> @load_zext_v2i32i64(ptr %ap) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v2i32i64:
@@ -140,10 +137,6 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap)  {
 ; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, x9]
 ; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, x10]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
-; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
-; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
@@ -230,8 +223,6 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap)  {
 ; CHECK-NEXT:    mov x8, #4 // =0x4
 ; CHECK-NEXT:    ld1sh { z1.s }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v8i16i32:
@@ -398,14 +389,6 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap)  {
 ; CHECK-NEXT:    ld1h { z5.d }, p0/z, [x0, x9, lsl #1]
 ; CHECK-NEXT:    ld1h { z6.d }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NEXT:    ld1h { z7.d }, p0/z, [x0, x10, lsl #1]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
-; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
-; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
-; CHECK-NEXT:    // kill: def $q4 killed $q4 killed $z4
-; CHECK-NEXT:    // kill: def $q5 killed $q5 killed $z5
-; CHECK-NEXT:    // kill: def $q6 killed $q6 killed $z6
-; CHECK-NEXT:    // kill: def $q7 killed $q7 killed $z7
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v16i16i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 35dd827bbabc55..6adab37b84e35b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -10,7 +10,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 ; CHECK-LABEL: extract_subvector_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov z3.b, z0.b[5]
@@ -18,7 +17,6 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i1:
@@ -45,7 +43,6 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov z3.b, z0.b[5]
@@ -53,7 +50,6 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i8:
@@ -78,9 +74,7 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v16i8:
@@ -117,10 +111,8 @@ define void @extract_subvector_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4i16:
@@ -138,9 +130,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i16:
@@ -177,9 +167,7 @@ define void @extract_subvector_v16i16(ptr %a, ptr %b) {
 define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2i32:
@@ -198,9 +186,7 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4i32:
@@ -237,9 +223,7 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: extract_subvector_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2i64:
@@ -276,9 +260,7 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
 define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4f16:
@@ -297,9 +279,7 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8f16:
@@ -336,9 +316,7 @@ define void @extract_subvector_v16f16(ptr %a, ptr %b) {
 define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2f32:
@@ -357,9 +335,7 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4f32:
@@ -396,9 +372,7 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
 define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: extract_subvector_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index cf308e6c4395ff..71a0052f22164e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -12,9 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @extractelement_v2f16(<2 x half> %op1) {
 ; CHECK-LABEL: extractelement_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f16:
@@ -32,9 +30,7 @@ define half @extractelement_v2f16(<2 x half> %op1) {
 define half @extractelement_v4f16(<4 x half> %op1) {
 ; CHECK-LABEL: extractelement_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[3]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f16:
@@ -52,9 +48,7 @@ define half @extractelement_v4f16(<4 x half> %op1) {
 define half @extractelement_v8f16(<8 x half> %op1) {
 ; CHECK-LABEL: extractelement_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v8f16:
@@ -73,7 +67,6 @@ define half @extractelement_v16f16(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v16f16:
@@ -92,9 +85,7 @@ define half @extractelement_v16f16(ptr %a) {
 define float @extractelement_v2f32(<2 x float> %op1) {
 ; CHECK-LABEL: extractelement_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f32:
@@ -112,9 +103,7 @@ define float @extractelement_v2f32(<2 x float> %op1) {
 define float @extractelement_v4f32(<4 x float> %op1) {
 ; CHECK-LABEL: extractelement_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f32:
@@ -133,7 +122,6 @@ define float @extractelement_v8f32(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v8f32:
@@ -163,9 +151,7 @@ define double @extractelement_v1f64(<1 x double> %op1) {
 define double @extractelement_v2f64(<2 x double> %op1) {
 ; CHECK-LABEL: extractelement_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f64:
@@ -184,7 +170,6 @@ define double @extractelement_v4f64(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 2282e74af5d006..72982d175901d8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -847,10 +847,10 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
 ; SVE2-NEXT:    fcvt z0.s, p0/m, z0.d
 ; SVE2-NEXT:    fcvt z1.s, p0/m, z1.d
 ; SVE2-NEXT:    ptrue p0.s, vl2
-; SVE2-NEXT:    uzp1 z3.s, z0.s, z0.s
-; SVE2-NEXT:    uzp1 z2.s, z1.s, z1.s
+; SVE2-NEXT:    uzp1 z2.s, z0.s, z0.s
+; SVE2-NEXT:    uzp1 z1.s, z1.s, z1.s
+; SVE2-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; SVE2-NEXT:    mov z1.s, #0x7fffffff
-; SVE2-NEXT:    splice z0.s, p0, { z2.s, z3.s }
 ; SVE2-NEXT:    ldr q2, [x0]
 ; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
 ; SVE2-NEXT:    str q2, [x0]
@@ -1242,10 +1242,10 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
 ; SVE2-NEXT:    fcvt z0.h, p0/m, z0.s
 ; SVE2-NEXT:    fcvt z1.h, p0/m, z1.s
 ; SVE2-NEXT:    ptrue p0.h, vl4
-; SVE2-NEXT:    uzp1 z3.h, z0.h, z0.h
-; SVE2-NEXT:    uzp1 z2.h, z1.h, z1.h
+; SVE2-NEXT:    uzp1 z2.h, z0.h, z0.h
+; SVE2-NEXT:    uzp1 z1.h, z1.h, z1.h
+; SVE2-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; SVE2-NEXT:    mov z1.h, #32767 // =0x7fff
-; SVE2-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; SVE2-NEXT:    ldr q2, [x0]
 ; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
 ; SVE2-NEXT:    str q2, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index f2c882c370eabe..b3f8f9d0764947 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -13,10 +13,7 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fadd_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f16:
@@ -63,10 +60,7 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fadd_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v4f16:
@@ -113,10 +107,7 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fadd_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v8f16:
@@ -333,10 +324,7 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fadd_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f32:
@@ -361,10 +349,7 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fadd_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v4f32:
@@ -449,10 +434,7 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fadd_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f64:
@@ -523,10 +505,7 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fdiv_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f16:
@@ -573,10 +552,7 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fdiv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v4f16:
@@ -623,10 +599,7 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fdiv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v8f16:
@@ -843,10 +816,7 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fdiv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f32:
@@ -871,10 +841,7 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fdiv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v4f32:
@@ -959,10 +926,7 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fdiv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f64:
@@ -1033,11 +997,7 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
 ; CHECK-LABEL: fma_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f16:
@@ -1093,11 +1053,7 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
 ; CHECK-LABEL: fma_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f16:
@@ -1153,11 +1109,7 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
 ; CHECK-LABEL: fma_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v8f16:
@@ -1428,11 +1380,7 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
 ; CHECK-LABEL: fma_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f32:
@@ -1459,11 +1407,7 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
 ; CHECK-LABEL: fma_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f32:
@@ -1560,11 +1504,7 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
 ; CHECK-LABEL: fma_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f64:
@@ -1644,10 +1584,7 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fmul_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f16:
@@ -1694,10 +1631,7 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmul_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v4f16:
@@ -1744,10 +1678,7 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmul_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v8f16:
@@ -1964,10 +1895,7 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmul_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f32:
@@ -1992,10 +1920,7 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmul_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v4f32:
@@ -2080,10 +2005,7 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmul_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f64:
@@ -2154,9 +2076,7 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fneg_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f16:
@@ -2194,9 +2114,7 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fneg_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v4f16:
@@ -2234,9 +2152,7 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fneg_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v8f16:
@@ -2399,9 +2315,7 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fneg_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f32:
@@ -2424,9 +2338,7 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fneg_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v4f32:
@@ -2499,9 +2411,7 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fneg_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f64:
@@ -2563,9 +2473,7 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fsqrt_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f16:
@@ -2603,9 +2511,7 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fsqrt_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v4f16:
@@ -2643,9 +2549,7 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fsqrt_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v8f16:
@@ -2808,9 +2712,7 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fsqrt_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f32:
@@ -2833,9 +2735,7 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fsqrt_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v4f32:
@@ -2908,9 +2808,7 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fsqrt_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f64:
@@ -2972,10 +2870,7 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fsub_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f16:
@@ -3022,10 +2917,7 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fsub_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v4f16:
@@ -3072,10 +2964,7 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fsub_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v8f16:
@@ -3292,10 +3181,7 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fsub_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f32:
@@ -3320,10 +3206,7 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fsub_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v4f32:
@@ -3408,10 +3291,7 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fsub_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f64:
@@ -3482,9 +3362,7 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fabs_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f16:
@@ -3522,9 +3400,7 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fabs_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v4f16:
@@ -3562,9 +3438,7 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fabs_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v8f16:
@@ -3727,9 +3601,7 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fabs_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f32:
@@ -3752,9 +3624,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fabs_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v4f32:
@@ -3827,9 +3697,7 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fabs_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index 200ffb60a7928a..3962f288fa720b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -13,12 +13,9 @@ define <2 x i16> @fcmp_oeq_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f16:
@@ -51,11 +48,8 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v4f16:
@@ -103,11 +97,8 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v8f16:
@@ -327,11 +318,8 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f32:
@@ -358,11 +346,8 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v4f32:
@@ -458,7 +443,6 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) {
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csetm x8, eq
 ; CHECK-NEXT:    mov z0.d, x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v1f64:
@@ -480,11 +464,8 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index c96189b9602685..6e0d169779e0c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -12,9 +12,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v2f16_to_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -40,9 +39,8 @@ define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
 define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v4f16_to_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
@@ -74,10 +72,9 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v8f16_to_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
@@ -122,21 +119,19 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v16f16_to_v16f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvt z2.s, p0/m, z2.h
 ; CHECK-NEXT:    fcvt z3.s, p0/m, z3.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT:    stp q3, q0, [x0]
 ; CHECK-NEXT:    stp q2, q1, [x0, #32]
+; CHECK-NEXT:    stp q3, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
@@ -1134,7 +1129,6 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) {
 ; CHECK-LABEL: fcvt_v1f64_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
 ; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -1153,7 +1147,6 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) {
 ; CHECK-LABEL: fcvt_v2f64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
 ; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
index 680cb4fb0a7910..88d8ac0fbbd663 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
@@ -12,11 +12,7 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
 ; CHECK-LABEL: fma_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f16:
@@ -84,11 +80,7 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
 ; CHECK-LABEL: fma_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v8f16:
@@ -451,11 +443,7 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
 ; CHECK-LABEL: fma_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f32:
@@ -483,11 +471,7 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
 ; CHECK-LABEL: fma_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f32:
@@ -606,11 +590,7 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
 ; CHECK-LABEL: fma_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 84aea185917fae..61c25145df31ad 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -13,10 +13,7 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmaxnm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v4f16:
@@ -63,10 +60,7 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmaxnm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v8f16:
@@ -283,10 +277,7 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmaxnm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v2f32:
@@ -311,10 +302,7 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmaxnm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v4f32:
@@ -418,10 +406,7 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmaxnm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v2f64:
@@ -492,10 +477,7 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fminnm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v4f16:
@@ -542,10 +524,7 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fminnm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v8f16:
@@ -762,10 +741,7 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fminnm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v2f32:
@@ -790,10 +766,7 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fminnm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v4f32:
@@ -897,10 +870,7 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fminnm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v2f64:
@@ -971,10 +941,7 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmax_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v4f16:
@@ -1021,10 +988,7 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmax_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v8f16:
@@ -1241,10 +1205,7 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmax_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v2f32:
@@ -1269,10 +1230,7 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmax_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v4f32:
@@ -1376,10 +1334,7 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmax_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v2f64:
@@ -1450,10 +1405,7 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmin_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v4f16:
@@ -1500,10 +1452,7 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmin_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v8f16:
@@ -1720,10 +1669,7 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmin_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v2f32:
@@ -1748,10 +1694,7 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmin_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v4f32:
@@ -1855,10 +1798,7 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmin_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
index f081d4ac65b279..0c5bfe8be1dab7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
@@ -10,15 +10,11 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
 ; FA64-LABEL: fadda_v4f16:
 ; FA64:       // %bb.0:
 ; FA64-NEXT:    ptrue p0.h, vl4
-; FA64-NEXT:    // kill: def $h0 killed $h0 def $z0
-; FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; FA64-NEXT:    fadda h0, p0, h0, z1.h
-; FA64-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; FA64-NEXT:    ret
 ;
 ; NO-FA64-LABEL: fadda_v4f16:
 ; NO-FA64:       // %bb.0:
-; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; NO-FA64-NEXT:    fadd h0, h0, h1
 ; NO-FA64-NEXT:    mov z2.h, z1.h[1]
 ; NO-FA64-NEXT:    fadd h0, h0, h2

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
index 4eaaee7ce5055d..8a53607b8923b6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
@@ -11,7 +11,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @fadda_v4f16(half %start, <4 x half> %a) {
 ; CHECK-LABEL: fadda_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    mov z2.h, z1.h[1]
 ; CHECK-NEXT:    fadd h0, h0, h2
@@ -55,7 +54,6 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
 define half @fadda_v8f16(half %start, <8 x half> %a) {
 ; CHECK-LABEL: fadda_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    mov z2.h, z1.h[1]
 ; CHECK-NEXT:    fadd h0, h0, h2
@@ -258,7 +256,6 @@ define half @fadda_v16f16(half %start, ptr %a) {
 define float @fadda_v2f32(float %start, <2 x float> %a) {
 ; CHECK-LABEL: fadda_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    mov z1.s, z1.s[1]
 ; CHECK-NEXT:    fadd s0, s0, s1
@@ -281,7 +278,6 @@ define float @fadda_v2f32(float %start, <2 x float> %a) {
 define float @fadda_v4f32(float %start, <4 x float> %a) {
 ; CHECK-LABEL: fadda_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    fadd s0, s0, s2
@@ -372,7 +368,6 @@ define double @fadda_v1f64(double %start, <1 x double> %a) {
 define double @fadda_v2f64(double %start, <2 x double> %a) {
 ; CHECK-LABEL: fadda_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd d0, d0, d1
 ; CHECK-NEXT:    mov z1.d, z1.d[1]
 ; CHECK-NEXT:    fadd d0, d0, d1
@@ -432,7 +427,6 @@ define half @faddv_v4f16(half %start, <4 x half> %a) {
 ; CHECK-LABEL: faddv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -472,7 +466,6 @@ define half @faddv_v8f16(half %start, <8 x half> %a) {
 ; CHECK-LABEL: faddv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -633,7 +626,6 @@ define float @faddv_v2f32(float %start, <2 x float> %a) {
 ; CHECK-LABEL: faddv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    faddv s1, p0, z1.s
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
@@ -656,7 +648,6 @@ define float @faddv_v4f32(float %start, <4 x float> %a) {
 ; CHECK-LABEL: faddv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv s1, p0, z1.s
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
@@ -728,7 +719,6 @@ define double @faddv_v2f64(double %start, <2 x double> %a) {
 ; CHECK-LABEL: faddv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv d1, p0, z1.d
 ; CHECK-NEXT:    fadd d0, d0, d1
 ; CHECK-NEXT:    ret
@@ -780,9 +770,7 @@ define half @fmaxv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fmaxv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v4f16:
@@ -816,9 +804,7 @@ define half @fmaxv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fmaxv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v8f16:
@@ -874,7 +860,6 @@ define half @fmaxv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v16f16:
@@ -969,9 +954,7 @@ define float @fmaxv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fmaxv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v2f32:
@@ -991,9 +974,7 @@ define float @fmaxv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fmaxv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v4f32:
@@ -1018,7 +999,6 @@ define float @fmaxv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v8f32:
@@ -1060,9 +1040,7 @@ define double @fmaxv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fmaxv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v2f64:
@@ -1083,7 +1061,6 @@ define double @fmaxv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v4f64:
@@ -1110,9 +1087,7 @@ define half @fminv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fminv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v4f16:
@@ -1146,9 +1121,7 @@ define half @fminv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fminv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v8f16:
@@ -1204,7 +1177,6 @@ define half @fminv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v16f16:
@@ -1299,9 +1271,7 @@ define float @fminv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fminv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v2f32:
@@ -1321,9 +1291,7 @@ define float @fminv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fminv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v4f32:
@@ -1348,7 +1316,6 @@ define float @fminv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v8f32:
@@ -1390,9 +1357,7 @@ define double @fminv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fminv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v2f64:
@@ -1413,7 +1378,6 @@ define double @fminv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v4f64:
@@ -1440,9 +1404,7 @@ define half @fmaximumv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fmaximumv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v4f16:
@@ -1476,9 +1438,7 @@ define half @fmaximumv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fmaximumv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v8f16:
@@ -1534,7 +1494,6 @@ define half @fmaximumv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v16f16:
@@ -1629,9 +1588,7 @@ define float @fmaximumv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fmaximumv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v2f32:
@@ -1651,9 +1608,7 @@ define float @fmaximumv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fmaximumv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v4f32:
@@ -1678,7 +1633,6 @@ define float @fmaximumv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v8f32:
@@ -1720,9 +1674,7 @@ define double @fmaximumv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fmaximumv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v2f64:
@@ -1743,7 +1695,6 @@ define double @fmaximumv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v4f64:
@@ -1770,9 +1721,7 @@ define half @fminimumv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fminimumv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v4f16:
@@ -1806,9 +1755,7 @@ define half @fminimumv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fminimumv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v8f16:
@@ -1864,7 +1811,6 @@ define half @fminimumv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminv h0, p0, z0.h
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v16f16:
@@ -1959,9 +1905,7 @@ define float @fminimumv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fminimumv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v2f32:
@@ -1981,9 +1925,7 @@ define float @fminimumv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fminimumv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v4f32:
@@ -2008,7 +1950,6 @@ define float @fminimumv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fminv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v8f32:
@@ -2050,9 +1991,7 @@ define double @fminimumv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fminimumv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v2f64:
@@ -2073,7 +2012,6 @@ define double @fminimumv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fminv d0, p0, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v4f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 03bc39a6ef3ee1..70e4b02e9de6cd 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -13,9 +13,7 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintp_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f16:
@@ -53,9 +51,7 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintp_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v4f16:
@@ -93,9 +89,7 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintp_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v8f16:
@@ -258,9 +252,7 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintp_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f32:
@@ -283,9 +275,7 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintp_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v4f32:
@@ -377,9 +367,7 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintp_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f64:
@@ -441,9 +429,7 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintm_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f16:
@@ -481,9 +467,7 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v4f16:
@@ -521,9 +505,7 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v8f16:
@@ -686,9 +668,7 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f32:
@@ -711,9 +691,7 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v4f32:
@@ -805,9 +783,7 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f64:
@@ -869,9 +845,7 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frinti_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f16:
@@ -909,9 +883,7 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frinti_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v4f16:
@@ -949,9 +921,7 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frinti_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v8f16:
@@ -1114,9 +1084,7 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frinti_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f32:
@@ -1139,9 +1107,7 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frinti_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v4f32:
@@ -1233,9 +1199,7 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frinti_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f64:
@@ -1297,9 +1261,7 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintx_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f16:
@@ -1337,9 +1299,7 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintx_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v4f16:
@@ -1377,9 +1337,7 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintx_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v8f16:
@@ -1542,9 +1500,7 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintx_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f32:
@@ -1567,9 +1523,7 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintx_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v4f32:
@@ -1661,9 +1615,7 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintx_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f64:
@@ -1725,9 +1677,7 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frinta_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f16:
@@ -1765,9 +1715,7 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frinta_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v4f16:
@@ -1805,9 +1753,7 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frinta_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v8f16:
@@ -1970,9 +1916,7 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frinta_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f32:
@@ -1995,9 +1939,7 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frinta_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v4f32:
@@ -2089,9 +2031,7 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frinta_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f64:
@@ -2153,9 +2093,7 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintn_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f16:
@@ -2193,9 +2131,7 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintn_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v4f16:
@@ -2233,9 +2169,7 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintn_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v8f16:
@@ -2398,9 +2332,7 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintn_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f32:
@@ -2423,9 +2355,7 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintn_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v4f32:
@@ -2517,9 +2447,7 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintn_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f64:
@@ -2581,9 +2509,7 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintz_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f16:
@@ -2621,9 +2547,7 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintz_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v4f16:
@@ -2661,9 +2585,7 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintz_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v8f16:
@@ -2826,9 +2748,7 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintz_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f32:
@@ -2851,9 +2771,7 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintz_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v4f32:
@@ -2945,9 +2863,7 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintz_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index bcc446d9d1a416..b2d5bf1ed8b1fa 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -10,12 +10,9 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f16:
@@ -52,12 +49,9 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f16:
@@ -94,12 +88,9 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8f16:
@@ -254,12 +245,9 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f32:
@@ -286,12 +274,9 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f32:
@@ -405,15 +390,11 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f64:
@@ -437,7 +418,6 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
 define void @select_v4f64(ptr %a, ptr %b, i1 %mask) {
 ; CHECK-LABEL: select_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 11fee267660c03..8b0ecfe64daba9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -12,9 +12,7 @@ define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f16_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i16:
@@ -196,11 +194,9 @@ define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i32:
@@ -224,11 +220,9 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) {
 define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i32:
@@ -439,14 +433,12 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
 define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.h, z0.h[1]
 ; CHECK-NEXT:    fcvtzu x8, h0
 ; CHECK-NEXT:    fcvtzu x9, h1
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fmov d1, x9
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i64:
@@ -757,9 +749,7 @@ define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f32_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i16:
@@ -781,10 +771,8 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f32_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i16:
@@ -819,7 +807,6 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i16:
@@ -942,9 +929,7 @@ define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f32_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i32:
@@ -966,9 +951,7 @@ define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f32_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i32:
@@ -1038,11 +1021,9 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f32_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f32_v1i64:
@@ -1062,11 +1043,9 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) {
 define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f32_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i64:
@@ -1188,7 +1167,6 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs w8, d0
 ; CHECK-NEXT:    mov z0.h, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i16:
@@ -1208,10 +1186,8 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f64_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i16:
@@ -1243,7 +1219,6 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i16:
@@ -1300,7 +1275,6 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
 ; CHECK-NEXT:    zip1 z1.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i16:
@@ -1504,10 +1478,8 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i32:
@@ -1527,10 +1499,8 @@ define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f64_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i32:
@@ -1559,7 +1529,6 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i32:
@@ -1646,9 +1615,7 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i64:
@@ -1668,9 +1635,7 @@ define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f64_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i64:
@@ -1729,9 +1694,7 @@ define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f16_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i16:
@@ -1913,11 +1876,9 @@ define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i32:
@@ -1941,11 +1902,9 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) {
 define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i32:
@@ -2157,14 +2116,12 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
 define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.h, z0.h[1]
 ; CHECK-NEXT:    fcvtzs x8, h0
 ; CHECK-NEXT:    fcvtzs x9, h1
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fmov d1, x9
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i64:
@@ -2475,9 +2432,7 @@ define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f32_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i16:
@@ -2499,10 +2454,8 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f32_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i16:
@@ -2537,7 +2490,6 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i16:
@@ -2660,9 +2612,7 @@ define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f32_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i32:
@@ -2684,9 +2634,7 @@ define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f32_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i32:
@@ -2756,11 +2704,9 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f32_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f32_v1i64:
@@ -2780,11 +2726,9 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) {
 define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f32_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i64:
@@ -2908,7 +2852,6 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs w8, d0
 ; CHECK-NEXT:    mov z0.h, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i16:
@@ -2928,10 +2871,8 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f64_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i16:
@@ -2963,7 +2904,6 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i16:
@@ -3020,7 +2960,6 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
 ; CHECK-NEXT:    zip1 z1.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i16:
@@ -3224,10 +3163,8 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i32:
@@ -3247,10 +3184,8 @@ define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f64_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i32:
@@ -3279,7 +3214,6 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i32:
@@ -3366,9 +3300,7 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i64:
@@ -3388,9 +3320,7 @@ define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f64_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index ad5f91a5f39a49..73b77c6dde4b23 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -8,18 +8,14 @@ target triple = "aarch64-unknown-linux-gnu"
 define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    mov z3.s, z2.s[1]
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f16:
@@ -53,16 +49,12 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
 define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f16:
@@ -109,17 +101,13 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
 define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8f16:
@@ -338,16 +326,12 @@ define void @select_v16f16(ptr %a, ptr %b) {
 define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f32:
@@ -376,17 +360,13 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
 define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f32:
@@ -513,17 +493,13 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 275d13ebfd9491..ae420059139d01 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -16,11 +16,9 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i8:
@@ -52,11 +50,9 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) {
 ; CHECK-NEXT:    index z1.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z2.b, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
 ; CHECK-NEXT:    mov z0.b, p0/m, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8i8:
@@ -90,11 +86,9 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) {
 ; CHECK-NEXT:    index z1.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z2.b, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
 ; CHECK-NEXT:    mov z0.b, p0/m, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v16i8:
@@ -130,11 +124,9 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) {
 ; CHECK-NEXT:    index z2.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z3.b, w8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z2.b, z3.b
 ; CHECK-NEXT:    mov z1.b, p0/m, w8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v32i8:
@@ -171,11 +163,9 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2i16:
@@ -204,11 +194,9 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i16:
@@ -240,11 +228,9 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8i16:
@@ -278,11 +264,9 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) {
 ; CHECK-NEXT:    index z2.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z3.h, w8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z2.h, z3.h
 ; CHECK-NEXT:    mov z1.h, p0/m, w8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v16i16:
@@ -317,11 +301,9 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2i32:
@@ -350,11 +332,9 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i32:
@@ -389,7 +369,6 @@ define <8 x i32> @insertelement_v8i32(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.s, p0/m, w8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8i32:
@@ -419,7 +398,6 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: insertelement_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #5 // =0x5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v1i64:
@@ -442,11 +420,9 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) {
 ; CHECK-NEXT:    index z1.d, #0, #1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, x8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2i64:
@@ -479,7 +455,6 @@ define <4 x i64> @insertelement_v4i64(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.d, p0/m, x8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i64:
@@ -507,9 +482,7 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) {
 ; CHECK-LABEL: insertelement_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov h1, #5.00000000
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2f16:
@@ -535,11 +508,9 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    fmov h1, #5.00000000
 ; CHECK-NEXT:    mov z0.h, p0/m, h1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4f16:
@@ -572,11 +543,9 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    fmov h1, #5.00000000
 ; CHECK-NEXT:    mov z0.h, p0/m, h1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8f16:
@@ -615,7 +584,6 @@ define <16 x half> @insertelement_v16f16(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.h, p0/m, h2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v16f16:
@@ -652,11 +620,9 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    fmov s1, #5.00000000
 ; CHECK-NEXT:    mov z0.s, p0/m, s1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2f32:
@@ -685,11 +651,9 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    fmov s1, #5.00000000
 ; CHECK-NEXT:    mov z0.s, p0/m, s1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4f32:
@@ -724,7 +688,6 @@ define <8 x float> @insertelement_v8f32(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.s, p0/m, s2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8f32:
@@ -777,11 +740,9 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) {
 ; CHECK-NEXT:    index z1.d, #0, #1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
 ; CHECK-NEXT:    fmov d1, #5.00000000
 ; CHECK-NEXT:    mov z0.d, p0/m, d1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2f64:
@@ -814,7 +775,6 @@ define <4 x double> @insertelement_v4f64(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.d, p0/m, d2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index 4360f3a12014a4..95bf887e48913d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -12,10 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: add_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i8:
@@ -49,10 +46,7 @@ define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: add_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v8i8:
@@ -102,10 +96,7 @@ define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: add_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v16i8:
@@ -343,10 +334,7 @@ define void @add_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: add_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i16:
@@ -371,10 +359,7 @@ define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: add_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i16:
@@ -408,10 +393,7 @@ define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: add_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v8i16:
@@ -553,10 +535,7 @@ define void @add_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: add_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i32:
@@ -581,10 +560,7 @@ define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: add_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i32:
@@ -672,10 +648,7 @@ define void @add_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: add_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v1i64:
@@ -696,10 +669,7 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: add_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i64:
@@ -771,18 +741,12 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: mul_v4i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i8:
@@ -817,18 +781,12 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: mul_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v8i8:
@@ -879,18 +837,12 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: mul_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v16i8:
@@ -1140,18 +1092,12 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: mul_v2i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i16:
@@ -1176,18 +1122,12 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: mul_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i16:
@@ -1222,18 +1162,12 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: mul_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v8i16:
@@ -1387,18 +1321,12 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: mul_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i32:
@@ -1423,18 +1351,12 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: mul_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i32:
@@ -1528,18 +1450,12 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: mul_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v1i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v1i64:
@@ -1561,18 +1477,12 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: mul_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i64:
@@ -1651,10 +1561,7 @@ define void @mul_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: sub_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i8:
@@ -1688,10 +1595,7 @@ define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: sub_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v8i8:
@@ -1741,10 +1645,7 @@ define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sub_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v16i8:
@@ -1982,10 +1883,7 @@ define void @sub_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: sub_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i16:
@@ -2010,10 +1908,7 @@ define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: sub_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i16:
@@ -2047,10 +1942,7 @@ define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sub_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v8i16:
@@ -2192,10 +2084,7 @@ define void @sub_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: sub_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i32:
@@ -2220,10 +2109,7 @@ define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: sub_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i32:
@@ -2311,10 +2197,7 @@ define void @sub_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: sub_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v1i64:
@@ -2335,10 +2218,7 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: sub_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i64:
@@ -2410,10 +2290,8 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) {
 ; CHECK-LABEL: abs_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i8:
@@ -2447,9 +2325,7 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) {
 ; CHECK-LABEL: abs_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v8i8:
@@ -2499,9 +2375,7 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) {
 ; CHECK-LABEL: abs_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v16i8:
@@ -2736,10 +2610,8 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) {
 ; CHECK-LABEL: abs_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i16:
@@ -2764,9 +2636,7 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) {
 ; CHECK-LABEL: abs_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i16:
@@ -2800,9 +2670,7 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) {
 ; CHECK-LABEL: abs_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v8i16:
@@ -2941,9 +2809,7 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) {
 ; CHECK-LABEL: abs_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i32:
@@ -2968,9 +2834,7 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) {
 ; CHECK-LABEL: abs_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i32:
@@ -3055,9 +2919,7 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: abs_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v1i64:
@@ -3079,9 +2941,7 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) {
 ; CHECK-LABEL: abs_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index ba20de65a253a1..843d146051fff0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -13,11 +13,8 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: icmp_eq_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v8i8:
@@ -77,11 +74,8 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: icmp_eq_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v16i8:
@@ -373,11 +367,8 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: icmp_eq_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v4i16:
@@ -417,11 +408,8 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: icmp_eq_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v8i16:
@@ -593,11 +581,8 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: icmp_eq_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v2i32:
@@ -625,11 +610,8 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: icmp_eq_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v4i32:
@@ -729,11 +711,8 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: icmp_eq_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v1i64:
@@ -757,11 +736,8 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: icmp_eq_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 1fdcd4f8268708..6b5258ea35fb02 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -10,11 +10,10 @@ target triple = "aarch64-unknown-linux-gnu"
 ;
 
 define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
+;
 ; CHECK-LABEL: sdiv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -22,7 +21,6 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
@@ -56,11 +54,9 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
@@ -74,7 +70,6 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
@@ -124,18 +119,16 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z4.s, z2.h
 ; CHECK-NEXT:    sunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z2.h
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
@@ -148,19 +141,18 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
@@ -266,6 +258,7 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z4.h, z2.b
 ; CHECK-NEXT:    sunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    sunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
@@ -274,6 +267,7 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    sunpklo z16.h, z5.b
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    sunpklo z5.h, z5.b
 ; CHECK-NEXT:    sunpklo z18.s, z16.h
 ; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
@@ -282,34 +276,32 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    sunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    sunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
 ; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z6.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z16.h, z16.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z6.b, z6.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v32i8:
@@ -463,12 +455,9 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
@@ -493,14 +482,11 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
@@ -534,13 +520,11 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
@@ -549,7 +533,6 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
@@ -617,14 +600,14 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    sdiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -715,10 +698,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
@@ -743,10 +723,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
@@ -831,10 +808,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
@@ -856,10 +830,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:
@@ -927,16 +898,13 @@ define void @sdiv_v4i64(ptr %a, ptr %b)  {
 define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: udiv_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i8:
@@ -970,11 +938,9 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: udiv_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
@@ -988,7 +954,6 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v8i8:
@@ -1038,18 +1003,16 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: udiv_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z4.s, z2.h
 ; CHECK-NEXT:    uunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
@@ -1062,19 +1025,18 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v16i8:
@@ -1180,6 +1142,7 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z4.h, z2.b
 ; CHECK-NEXT:    uunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
@@ -1188,6 +1151,7 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    uunpklo z16.h, z5.b
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uunpklo z5.h, z5.b
 ; CHECK-NEXT:    uunpklo z18.s, z16.h
 ; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
@@ -1196,34 +1160,32 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    uunpklo z5.s, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    uunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    uunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
 ; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z6.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z16.h, z16.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z6.b, z6.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v32i8:
@@ -1376,13 +1338,10 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: udiv_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i16:
@@ -1407,14 +1366,11 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: udiv_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i16:
@@ -1448,13 +1404,11 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: udiv_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
@@ -1463,7 +1417,6 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v8i16:
@@ -1531,14 +1484,14 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    udiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -1629,10 +1582,7 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: udiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i32:
@@ -1657,10 +1607,7 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i32:
@@ -1745,10 +1692,7 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: udiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v1i64:
@@ -1770,10 +1714,7 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i64:
@@ -1905,14 +1846,14 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
-; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
+; NONEON-NOSVE-NEXT:    lsr w10, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp]
 ; NONEON-NOSVE-NEXT:    umull x8, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w8
 ; NONEON-NOSVE-NEXT:    add w8, w8, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w8, w8, #6
-; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #64

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 25a6ea490c163d..1d35e9376cc682 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -16,7 +16,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i1_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -90,7 +89,6 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i3_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -133,7 +131,6 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
@@ -421,7 +418,6 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
 define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -476,15 +472,14 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -891,9 +886,8 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
 define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -927,7 +921,6 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i8_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -1004,31 +997,30 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z4.d, z2.s
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z6.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z5.d, z3.s
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    sunpklo z6.d, z1.s
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z2.d, z2.s
-; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z7.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z3.d, z3.s
+; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    stp q4, q2, [x0]
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q6, q1, [x0, #32]
 ; CHECK-NEXT:    stp q5, q3, [x0, #64]
+; CHECK-NEXT:    stp q6, q1, [x0, #32]
 ; CHECK-NEXT:    stp q7, q0, [x0, #96]
 ; CHECK-NEXT:    ret
 ;
@@ -1585,7 +1577,6 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
@@ -1737,7 +1728,6 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
 define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -1774,15 +1764,14 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i16_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z2.d, z1.s
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z3.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -1994,7 +1983,6 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
@@ -2090,7 +2078,6 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
@@ -2378,7 +2365,6 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
 define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -2433,15 +2419,14 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -2848,7 +2833,6 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
 define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
@@ -2888,7 +2872,6 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i8_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -2969,31 +2952,30 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z6.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z5.d, z3.s
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    uunpklo z6.d, z1.s
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
-; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z7.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z3.d, z3.s
+; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    stp q4, q2, [x0]
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q6, q1, [x0, #32]
 ; CHECK-NEXT:    stp q5, q3, [x0, #64]
+; CHECK-NEXT:    stp q6, q1, [x0, #32]
 ; CHECK-NEXT:    stp q7, q0, [x0, #96]
 ; CHECK-NEXT:    ret
 ;
@@ -3603,7 +3585,6 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
@@ -3755,7 +3736,6 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
 define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -3794,15 +3774,14 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i16_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z2.d, z1.s
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z3.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -4038,7 +4017,6 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
 define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -4138,7 +4116,6 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    mov z1.s, w0
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    uunpklo z1.d, z1.s
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
 ; SVE-NEXT:    str q0, [x1]
@@ -4147,7 +4124,6 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; SVE2-LABEL: extend_and_mul:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z1.s, w0
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    uunpklo z1.d, z1.s
 ; SVE2-NEXT:    mul z0.d, z1.d, z0.d
 ; SVE2-NEXT:    str q0, [x1]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 687dd9445f387b..566621f4d1635d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -12,10 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: and_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v8i8:
@@ -65,10 +62,7 @@ define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: and_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v16i8:
@@ -306,10 +300,7 @@ define void @and_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: and_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v4i16:
@@ -343,10 +334,7 @@ define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: and_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v8i16:
@@ -488,10 +476,7 @@ define void @and_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: and_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v2i32:
@@ -516,10 +501,7 @@ define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: and_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v4i32:
@@ -607,10 +589,7 @@ define void @and_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: and_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v1i64:
@@ -631,10 +610,7 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: and_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v2i64:
@@ -705,10 +681,7 @@ define void @and_v4i64(ptr %a, ptr %b) {
 define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: or_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v8i8:
@@ -758,10 +731,7 @@ define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: or_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v16i8:
@@ -999,10 +969,7 @@ define void @or_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: or_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v4i16:
@@ -1036,10 +1003,7 @@ define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: or_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v8i16:
@@ -1181,10 +1145,7 @@ define void @or_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: or_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v2i32:
@@ -1209,10 +1170,7 @@ define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: or_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v4i32:
@@ -1300,10 +1258,7 @@ define void @or_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: or_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v1i64:
@@ -1324,10 +1279,7 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: or_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v2i64:
@@ -1398,10 +1350,7 @@ define void @or_v4i64(ptr %a, ptr %b) {
 define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: xor_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v8i8:
@@ -1451,10 +1400,7 @@ define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: xor_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v16i8:
@@ -1692,10 +1638,7 @@ define void @xor_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: xor_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v4i16:
@@ -1729,10 +1672,7 @@ define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: xor_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v8i16:
@@ -1874,10 +1814,7 @@ define void @xor_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: xor_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v2i32:
@@ -1902,10 +1839,7 @@ define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: xor_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v4i32:
@@ -1993,10 +1927,7 @@ define void @xor_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: xor_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v1i64:
@@ -2017,10 +1948,7 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: xor_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index 1bca7dd09d9b7a..1aaec95cd9624c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -13,10 +13,7 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: smax_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v8i8:
@@ -75,10 +72,7 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: smax_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v16i8:
@@ -367,10 +361,7 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: smax_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v4i16:
@@ -409,10 +400,7 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: smax_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v8i16:
@@ -581,10 +569,7 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: smax_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v2i32:
@@ -611,10 +596,7 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: smax_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v4i32:
@@ -712,10 +694,7 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: smax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v1i64:
@@ -739,10 +718,7 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: smax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v2i64:
@@ -819,10 +795,7 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: smin_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v8i8:
@@ -881,10 +854,7 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: smin_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v16i8:
@@ -1173,10 +1143,7 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: smin_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v4i16:
@@ -1215,10 +1182,7 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: smin_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v8i16:
@@ -1387,10 +1351,7 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: smin_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v2i32:
@@ -1417,10 +1378,7 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: smin_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v4i32:
@@ -1518,10 +1476,7 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: smin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v1i64:
@@ -1545,10 +1500,7 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: smin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v2i64:
@@ -1625,10 +1577,7 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: umax_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v8i8:
@@ -1687,10 +1636,7 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: umax_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v16i8:
@@ -1979,10 +1925,7 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: umax_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v4i16:
@@ -2021,10 +1964,7 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: umax_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v8i16:
@@ -2193,10 +2133,7 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: umax_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v2i32:
@@ -2223,10 +2160,7 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: umax_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v4i32:
@@ -2324,10 +2258,7 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: umax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v1i64:
@@ -2351,10 +2282,7 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: umax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v2i64:
@@ -2431,10 +2359,7 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: umin_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v8i8:
@@ -2493,10 +2418,7 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: umin_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v16i8:
@@ -2785,10 +2707,7 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: umin_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v4i16:
@@ -2827,10 +2746,7 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: umin_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v8i16:
@@ -2999,10 +2915,7 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: umin_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v2i32:
@@ -3029,10 +2942,7 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: umin_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v4i32:
@@ -3130,10 +3040,7 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: umin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v1i64:
@@ -3157,10 +3064,7 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: umin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
index 94d5bb1543b0e0..793c5ad625930a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
@@ -15,11 +15,7 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
 ; NO-FA64-LABEL: mla8xi8:
 ; NO-FA64:       // %bb.0:
 ; NO-FA64-NEXT:    ptrue p0.b, vl8
-; NO-FA64-NEXT:    // kill: def $d0 killed $d0 def $z0
-; NO-FA64-NEXT:    // kill: def $d2 killed $d2 def $z2
-; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; NO-FA64-NEXT:    mad z0.b, p0/m, z1.b, z2.b
-; NO-FA64-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; NO-FA64-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mla8xi8:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index 319fa5c8458275..007262750c56b8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -12,10 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE2-LABEL: mul_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index b0fdce9a93bd3b..269d684ff22027 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -17,25 +17,19 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: smulh_v4i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
 ; SVE-NEXT:    lsr z0.h, z0.h, #4
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i8:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    ptrue p0.h, vl4
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
 ; SVE2-NEXT:    lsr z0.h, z0.h, #4
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i8:
@@ -80,18 +74,12 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: smulh_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v8i8:
@@ -156,18 +144,12 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: smulh_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v16i8:
@@ -575,25 +557,19 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: smulh_v2i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE-NEXT:    sxth z1.s, p0/m, z1.s
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
 ; SVE-NEXT:    lsr z0.s, z0.s, #16
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    ptrue p0.s, vl2
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
 ; SVE2-NEXT:    lsr z0.s, z0.s, #16
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i16:
@@ -625,18 +601,12 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: smulh_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i16:
@@ -679,18 +649,12 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: smulh_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v8i16:
@@ -910,18 +874,12 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: smulh_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i32:
@@ -951,18 +909,12 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: smulh_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i32:
@@ -1083,18 +1035,12 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: smulh_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v1i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v1i64:
@@ -1122,18 +1068,12 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: smulh_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i64:
@@ -1223,25 +1163,19 @@ define void @smulh_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: umulh_v4i8:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    and z0.h, z0.h, #0xff
 ; SVE-NEXT:    and z1.h, z1.h, #0xff
+; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
 ; SVE-NEXT:    lsr z0.h, z0.h, #4
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    and z0.h, z0.h, #0xff
 ; SVE2-NEXT:    and z1.h, z1.h, #0xff
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
 ; SVE2-NEXT:    lsr z0.h, z0.h, #4
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i8:
@@ -1284,18 +1218,12 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: umulh_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v8i8:
@@ -1358,18 +1286,12 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: umulh_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v16i8:
@@ -1776,25 +1698,19 @@ define void @umulh_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: umulh_v2i16:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    ptrue p0.s, vl2
 ; SVE-NEXT:    and z0.s, z0.s, #0xffff
 ; SVE-NEXT:    and z1.s, z1.s, #0xffff
+; SVE-NEXT:    ptrue p0.s, vl2
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
 ; SVE-NEXT:    lsr z0.s, z0.s, #16
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    and z0.s, z0.s, #0xffff
 ; SVE2-NEXT:    and z1.s, z1.s, #0xffff
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
 ; SVE2-NEXT:    lsr z0.s, z0.s, #16
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i16:
@@ -1826,18 +1742,12 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: umulh_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i16:
@@ -1880,18 +1790,12 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: umulh_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v8i16:
@@ -2111,18 +2015,12 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: umulh_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i32:
@@ -2152,18 +2050,12 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: umulh_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i32:
@@ -2286,18 +2178,12 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: umulh_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v1i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v1i64:
@@ -2323,18 +2209,12 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: umulh_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 92a67cba55f7a1..6a01b342850e97 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -13,10 +13,8 @@ define i8 @uaddv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: uaddv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v8i8:
@@ -49,10 +47,8 @@ define i8 @uaddv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: uaddv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v16i8:
@@ -104,7 +100,6 @@ define i8 @uaddv_v32i8(ptr %a) {
 ; CHECK-NEXT:    add z0.b, z1.b, z0.b
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v32i8:
@@ -186,10 +181,8 @@ define i16 @uaddv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: uaddv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v4i16:
@@ -214,10 +207,8 @@ define i16 @uaddv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: uaddv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v8i16:
@@ -253,7 +244,6 @@ define i16 @uaddv_v16i16(ptr %a) {
 ; CHECK-NEXT:    add z0.h, z1.h, z0.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v16i16:
@@ -303,10 +293,8 @@ define i32 @uaddv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: uaddv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v2i32:
@@ -326,10 +314,8 @@ define i32 @uaddv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: uaddv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v4i32:
@@ -354,7 +340,6 @@ define i32 @uaddv_v8i32(ptr %a) {
 ; CHECK-NEXT:    add z0.s, z1.s, z0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v8i32:
@@ -384,7 +369,6 @@ define i64 @uaddv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: uaddv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -434,7 +418,6 @@ define i8 @smaxv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: smaxv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -476,7 +459,6 @@ define i8 @smaxv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: smaxv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -657,7 +639,6 @@ define i16 @smaxv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: smaxv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -687,7 +668,6 @@ define i16 @smaxv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: smaxv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -796,7 +776,6 @@ define i32 @smaxv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: smaxv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -819,7 +798,6 @@ define i32 @smaxv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: smaxv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -887,7 +865,6 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: smaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -942,7 +919,6 @@ define i8 @sminv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: sminv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -984,7 +960,6 @@ define i8 @sminv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: sminv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1165,7 +1140,6 @@ define i16 @sminv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: sminv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1195,7 +1169,6 @@ define i16 @sminv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: sminv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1304,7 +1277,6 @@ define i32 @sminv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: sminv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1327,7 +1299,6 @@ define i32 @sminv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: sminv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1395,7 +1366,6 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: sminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1450,7 +1420,6 @@ define i8 @umaxv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: umaxv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1492,7 +1461,6 @@ define i8 @umaxv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: umaxv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1673,7 +1641,6 @@ define i16 @umaxv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: umaxv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1703,7 +1670,6 @@ define i16 @umaxv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: umaxv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1812,7 +1778,6 @@ define i32 @umaxv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: umaxv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1835,7 +1800,6 @@ define i32 @umaxv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: umaxv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1903,7 +1867,6 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: umaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1958,7 +1921,6 @@ define i8 @uminv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: uminv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2000,7 +1962,6 @@ define i8 @uminv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: uminv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2181,7 +2142,6 @@ define i16 @uminv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: uminv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2211,7 +2171,6 @@ define i16 @uminv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: uminv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2320,7 +2279,6 @@ define i32 @uminv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: uminv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2343,7 +2301,6 @@ define i32 @uminv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: uminv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2411,7 +2368,6 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: uminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index 9497ec88e57b4d..b351fec3c87e7c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -13,8 +13,6 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: srem_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p1.s, vl4
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
@@ -23,7 +21,6 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    sdivr z2.s, p1/m, z2.s, z3.s
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i8:
@@ -61,8 +58,6 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: srem_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -81,7 +76,6 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v8i8:
@@ -139,8 +133,6 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: srem_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -160,26 +152,25 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    mov z3.d, z1.d
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    sunpklo z3.h, z3.b
 ; CHECK-NEXT:    sunpklo z6.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v16i8:
@@ -306,6 +297,7 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q3, [x0]
 ; CHECK-NEXT:    ldr q4, [x1]
 ; CHECK-NEXT:    sunpklo z16.s, z16.h
+; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
 ; CHECK-NEXT:    sunpklo z17.h, z4.b
 ; CHECK-NEXT:    sunpklo z18.h, z3.b
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
@@ -315,9 +307,11 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    sunpklo z17.s, z17.h
 ; CHECK-NEXT:    sunpklo z18.s, z18.h
+; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
 ; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
 ; CHECK-NEXT:    mov z20.d, z3.d
 ; CHECK-NEXT:    ext z20.b, z20.b, z3.b, #8
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
 ; CHECK-NEXT:    sunpklo z20.h, z20.b
 ; CHECK-NEXT:    sunpklo z22.s, z20.h
 ; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
@@ -325,32 +319,29 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov z18.d, z4.d
 ; CHECK-NEXT:    sunpklo z20.s, z20.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z4.b, #8
+; CHECK-NEXT:    uzp1 z16.h, z19.h, z19.h
 ; CHECK-NEXT:    sunpklo z18.h, z18.b
 ; CHECK-NEXT:    sunpklo z21.s, z18.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    sunpklo z18.s, z18.h
 ; CHECK-NEXT:    sdivr z21.s, p0/m, z21.s, z22.s
 ; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
+; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
-; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
-; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z19.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z6.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z20.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z18.b, z5.b, z5.b
+; CHECK-NEXT:    splice z16.h, p0, { z19.h, z20.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
-; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
+; CHECK-NEXT:    splice z5.b, p0, { z17.b, z18.b }
+; CHECK-NEXT:    uzp1 z7.b, z16.b, z16.b
+; CHECK-NEXT:    splice z2.b, p0, { z6.b, z7.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
 ; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
@@ -539,8 +530,6 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: srem_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -548,7 +537,6 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i16:
@@ -586,8 +574,6 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: srem_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -600,12 +586,11 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v8i16:
@@ -684,15 +669,15 @@ define void @srem_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    mov z7.d, z1.d
 ; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    sunpklo z7.s, z7.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    splice z2.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
+; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
@@ -802,12 +787,9 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: srem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v2i32:
@@ -835,12 +817,9 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: srem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i32:
@@ -946,12 +925,9 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: srem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v1i64:
@@ -974,12 +950,9 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: srem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v2i64:
@@ -1062,18 +1035,15 @@ define void @srem_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: urem_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i8:
@@ -1111,8 +1081,6 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: urem_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1131,7 +1099,6 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v8i8:
@@ -1189,8 +1156,6 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: urem_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1210,26 +1175,25 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    mov z3.d, z1.d
 ; CHECK-NEXT:    uunpklo z5.s, z5.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    uunpklo z3.h, z3.b
 ; CHECK-NEXT:    uunpklo z6.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v16i8:
@@ -1356,6 +1320,7 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q3, [x0]
 ; CHECK-NEXT:    ldr q4, [x1]
 ; CHECK-NEXT:    uunpklo z16.s, z16.h
+; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
 ; CHECK-NEXT:    uunpklo z17.h, z4.b
 ; CHECK-NEXT:    uunpklo z18.h, z3.b
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
@@ -1365,9 +1330,11 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    uunpklo z17.s, z17.h
 ; CHECK-NEXT:    uunpklo z18.s, z18.h
+; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
 ; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
 ; CHECK-NEXT:    mov z20.d, z3.d
 ; CHECK-NEXT:    ext z20.b, z20.b, z3.b, #8
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
 ; CHECK-NEXT:    uunpklo z20.h, z20.b
 ; CHECK-NEXT:    uunpklo z22.s, z20.h
 ; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
@@ -1375,32 +1342,29 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov z18.d, z4.d
 ; CHECK-NEXT:    uunpklo z20.s, z20.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z4.b, #8
+; CHECK-NEXT:    uzp1 z16.h, z19.h, z19.h
 ; CHECK-NEXT:    uunpklo z18.h, z18.b
 ; CHECK-NEXT:    uunpklo z21.s, z18.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    uunpklo z18.s, z18.h
 ; CHECK-NEXT:    udivr z21.s, p0/m, z21.s, z22.s
 ; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
+; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
-; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
-; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z19.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z6.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z20.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z18.b, z5.b, z5.b
+; CHECK-NEXT:    splice z16.h, p0, { z19.h, z20.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
-; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
+; CHECK-NEXT:    splice z5.b, p0, { z17.b, z18.b }
+; CHECK-NEXT:    uzp1 z7.b, z16.b, z16.b
+; CHECK-NEXT:    splice z2.b, p0, { z6.b, z7.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
 ; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
@@ -1589,8 +1553,6 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: urem_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1598,7 +1560,6 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i16:
@@ -1636,8 +1597,6 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: urem_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1650,12 +1609,11 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v8i16:
@@ -1734,15 +1692,15 @@ define void @urem_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    mov z7.d, z1.d
 ; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    uunpklo z7.s, z7.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    splice z2.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
+; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
@@ -1852,12 +1810,9 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: urem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v2i32:
@@ -1885,12 +1840,9 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: urem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i32:
@@ -1996,12 +1948,9 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: urem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v1i64:
@@ -2024,12 +1973,9 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: urem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 4ac156c42fda00..9be0b52676b380 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -10,12 +10,9 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i8:
@@ -52,11 +49,8 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, w0
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i8:
@@ -109,11 +103,8 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, w0
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v16i8:
@@ -363,12 +354,9 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i16:
@@ -395,12 +383,9 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i16:
@@ -437,12 +422,9 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i16:
@@ -597,12 +579,9 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i32:
@@ -629,12 +608,9 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i32:
@@ -727,15 +703,11 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) {
 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v1i64:
@@ -757,15 +729,11 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i64:
@@ -789,7 +757,6 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index d0f99211e80fce..7e508cab610a3d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -13,12 +13,9 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: ashr_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i8:
@@ -53,10 +50,7 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: ashr_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v8i8:
@@ -107,10 +101,7 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: ashr_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v16i8:
@@ -351,12 +342,9 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: ashr_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i16:
@@ -382,10 +370,7 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: ashr_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i16:
@@ -420,10 +405,7 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: ashr_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v8i16:
@@ -568,10 +550,7 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: ashr_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i32:
@@ -596,10 +575,7 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: ashr_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i32:
@@ -684,10 +660,7 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: ashr_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v1i64:
@@ -709,10 +682,7 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: ashr_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i64:
@@ -782,13 +752,10 @@ define void @ashr_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: lshr_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i8:
@@ -823,10 +790,7 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: lshr_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v8i8:
@@ -877,10 +841,7 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: lshr_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v16i8:
@@ -1120,13 +1081,10 @@ define void @lshr_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: lshr_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i16:
@@ -1152,10 +1110,7 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: lshr_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i16:
@@ -1190,10 +1145,7 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: lshr_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v8i16:
@@ -1338,10 +1290,7 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: lshr_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i32:
@@ -1366,10 +1315,7 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: lshr_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i32:
@@ -1454,10 +1400,7 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: lshr_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v1i64:
@@ -1479,10 +1422,7 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: lshr_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i64:
@@ -1552,12 +1492,9 @@ define void @lshr_v4i64(ptr %a, ptr %b) {
 define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 ; CHECK-LABEL: shl_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i8:
@@ -1582,12 +1519,9 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: shl_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i8:
@@ -1622,10 +1556,7 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: shl_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v8i8:
@@ -1676,10 +1607,7 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: shl_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v16i8:
@@ -1920,10 +1848,7 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: shl_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i16:
@@ -1958,10 +1883,7 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: shl_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v8i16:
@@ -2106,10 +2028,7 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: shl_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i32:
@@ -2134,10 +2053,7 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: shl_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i32:
@@ -2222,10 +2138,7 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: shl_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v1i64:
@@ -2247,10 +2160,7 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: shl_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index e595686cb4975d..d5fe91a5ac8a2c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -12,9 +12,7 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v4i16_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f16:
@@ -196,11 +194,9 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) {
 define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v2i16_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f32:
@@ -222,11 +218,9 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) {
 define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v4i16_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f32:
@@ -388,7 +382,6 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) {
 define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v1i16_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w8, w8, #0xffff
 ; CHECK-NEXT:    ucvtf d0, w8
@@ -411,12 +404,10 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
 define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v2i16_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f64:
@@ -702,10 +693,8 @@ define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f16:
@@ -731,10 +720,8 @@ define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v4i32_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f16:
@@ -773,7 +760,6 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f16:
@@ -920,9 +906,7 @@ define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v2i32_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f32:
@@ -944,9 +928,7 @@ define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v4i32_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f32:
@@ -1016,11 +998,9 @@ define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) {
 define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v2i32_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f64:
@@ -1140,14 +1120,12 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) {
 define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) {
 ; CHECK-LABEL: ucvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    fmov x9, d1
 ; CHECK-NEXT:    ucvtf h0, x8
 ; CHECK-NEXT:    ucvtf h1, x9
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f16:
@@ -1182,7 +1160,6 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f16:
@@ -1238,7 +1215,6 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f16:
@@ -1294,10 +1270,8 @@ define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) {
 ; CHECK-LABEL: ucvtf_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f32:
@@ -1326,7 +1300,6 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f32:
@@ -1413,9 +1386,7 @@ define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) {
 ; CHECK-LABEL: ucvtf_v2i64_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f64:
@@ -1474,9 +1445,7 @@ define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v4i16_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f16:
@@ -1658,10 +1627,8 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v2i16_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f32:
@@ -1683,11 +1650,9 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) {
 define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v4i16_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f32:
@@ -1850,12 +1815,10 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v2i16_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f64:
@@ -2141,10 +2104,8 @@ define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f16:
@@ -2170,10 +2131,8 @@ define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v4i32_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f16:
@@ -2212,7 +2171,6 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f16:
@@ -2264,9 +2222,7 @@ define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v2i32_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f32:
@@ -2288,9 +2244,7 @@ define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v4i32_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f32:
@@ -2360,11 +2314,9 @@ define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) {
 define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v2i32_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f64:
@@ -2594,14 +2546,12 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) {
 define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) {
 ; CHECK-LABEL: scvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    fmov x9, d1
 ; CHECK-NEXT:    scvtf h0, x8
 ; CHECK-NEXT:    scvtf h1, x9
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f16:
@@ -2636,7 +2586,6 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f16:
@@ -2674,10 +2623,8 @@ define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) {
 ; CHECK-LABEL: scvtf_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f32:
@@ -2706,7 +2653,6 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f32:
@@ -2738,9 +2684,7 @@ define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) {
 ; CHECK-LABEL: scvtf_v2i64_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 41eb731fd66dfe..1cd2606c3c3ffc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -8,16 +8,12 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i8:
@@ -64,16 +60,12 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
 define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.b, z2.b, #7
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
 ; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i8:
@@ -148,16 +140,12 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
 define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) {
 ; CHECK-LABEL: select_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z2.b, z2.b, #7
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
 ; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v16i8:
@@ -510,16 +498,12 @@ define void @select_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i16:
@@ -551,16 +535,12 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
 define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i16:
@@ -607,17 +587,13 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
 define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i16:
@@ -808,16 +784,12 @@ define void @select_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i32:
@@ -849,17 +821,13 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
 define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i32:
@@ -967,15 +935,11 @@ define void @select_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
 ; CHECK-LABEL: select_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v1i64:
@@ -997,17 +961,13 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 613543310f2c31..ea49e49272a782 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -19,8 +19,8 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind {
 ; CHECK-NEXT:    ptrue p0.b, vl2
 ; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x20]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    mov z2.b, z0.b[1]
-; CHECK-NEXT:    zip1 z0.s, z0.s, z2.s
+; CHECK-NEXT:    mov z1.b, z0.b[1]
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    st1b { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
@@ -189,9 +189,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #4 // =0x4
 ; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x20]
-; CHECK-NEXT:    ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
+; CHECK-NEXT:    ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3]
 ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    stp q0, q2, [x19]
+; CHECK-NEXT:    stp q0, q1, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 3627390b5edfa9..43767c9eeaa34a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -15,7 +15,6 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) {
 ; CHECK-NEXT:    mov z0.s, z1.s[2]
 ; CHECK-NEXT:    add z1.s, z3.s, z3.s
 ; CHECK-NEXT:    add z3.s, z4.s, z4.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    stp q2, q5, [x0, #32]
 ; CHECK-NEXT:    stp q1, q3, [x0]
 ; CHECK-NEXT:    ret
@@ -107,7 +106,6 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    stp q1, q2, [x0, #32]
 ; CHECK-NEXT:    stp q3, q4, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test2:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index 504db6df18ee5a..38d22c1c90ff7b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -10,7 +10,6 @@ define <4 x i8> @load_v4i8(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v4i8:
@@ -79,7 +78,6 @@ define <2 x i16> @load_v2i16(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v2i16:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
index d4565c4b69c779..1184d4415d0fb2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
@@ -14,7 +14,6 @@ define i8 @andv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: andv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -41,7 +40,6 @@ define i8 @andv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: andv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -76,7 +74,6 @@ define i8 @andv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: andv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -211,7 +208,6 @@ define i16 @andv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: andv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -233,7 +229,6 @@ define i16 @andv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: andv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -260,7 +255,6 @@ define i16 @andv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: andv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -347,7 +341,6 @@ define i32 @andv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: andv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -369,7 +362,6 @@ define i32 @andv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: andv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -425,7 +417,6 @@ define i64 @andv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: andv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -475,7 +466,6 @@ define i8 @eorv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: eorv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -502,7 +492,6 @@ define i8 @eorv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: eorv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -537,7 +526,6 @@ define i8 @eorv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: eorv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -672,7 +660,6 @@ define i16 @eorv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: eorv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -694,7 +681,6 @@ define i16 @eorv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: eorv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -721,7 +707,6 @@ define i16 @eorv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: eorv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -808,7 +793,6 @@ define i32 @eorv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: eorv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -830,7 +814,6 @@ define i32 @eorv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: eorv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -886,7 +869,6 @@ define i64 @eorv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: eorv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -936,7 +918,6 @@ define i8 @orv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: orv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -963,7 +944,6 @@ define i8 @orv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: orv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -998,7 +978,6 @@ define i8 @orv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: orv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1133,7 +1112,6 @@ define i16 @orv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: orv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1155,7 +1133,6 @@ define i16 @orv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: orv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1182,7 +1159,6 @@ define i16 @orv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: orv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1269,7 +1245,6 @@ define i32 @orv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: orv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1291,7 +1266,6 @@ define i32 @orv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: orv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1347,7 +1321,6 @@ define i64 @orv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: orv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index c8cea6ebabd484..1dca9880925709 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -43,7 +43,6 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; CHECK-NEXT:    ldr x8, [x8]
 ; CHECK-NEXT:    mov z0.d, p0/m, x8
 ; CHECK-NEXT:  .LBB0_4: // %else2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index 48a642c908bfe6..d66164b146b6c2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -12,13 +12,11 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4i8:
@@ -108,13 +106,11 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) {
 define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v8i8:
@@ -303,13 +299,11 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) {
 define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v16i8:
@@ -746,8 +740,6 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    ld1b { z1.b }, p1/z, [x0, x8]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
 ;
@@ -1466,7 +1458,6 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
 define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    ptrue p0.h, vl4
@@ -1477,7 +1468,6 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v2f16:
@@ -1520,13 +1510,11 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
 define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4f16:
@@ -1617,14 +1605,12 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) {
 define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v8f16:
@@ -1814,22 +1800,19 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
 define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    lsl z1.h, z1.h, #15
-; CHECK-NEXT:    asr z1.h, z1.h, #15
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
-; CHECK-NEXT:    cmpne p1.h, p0/z, z1.h, #0
+; CHECK-NEXT:    asr z1.h, z1.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
+; CHECK-NEXT:    cmpne p1.h, p0/z, z1.h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p1/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v16f16:
@@ -2167,13 +2150,11 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
 define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v2f32:
@@ -2218,14 +2199,12 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) {
 define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4f32:
@@ -2312,7 +2291,6 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
 define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[3]
 ; CHECK-NEXT:    mov z2.b, z0.b[2]
 ; CHECK-NEXT:    mov x8, #4 // =0x4
@@ -2338,8 +2316,6 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p1/z, [x0]
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v8f32:
@@ -2516,14 +2492,12 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
 define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    lsl z0.d, z0.d, #63
 ; CHECK-NEXT:    asr z0.d, z0.d, #63
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v2f64:
@@ -2568,10 +2542,9 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
 define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #2 // =0x2
-; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -2582,9 +2555,7 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
 ; CHECK-NEXT:    cmpne p1.d, p0/z, z1.d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p1/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4f64:
@@ -2680,7 +2651,6 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_zext_v3i32:
@@ -2753,7 +2723,6 @@ define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_sext_v3i32:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
index 265480b571970f..9b6f571c1e008d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
@@ -12,9 +12,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    mov z0.h, #0 // =0x0
@@ -68,9 +67,8 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
 define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    mov z0.b, #0 // =0x0
@@ -161,9 +159,8 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
 define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    mov z0.b, #0 // =0x0
@@ -589,7 +586,6 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
 define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    ptrue p0.h, vl4
@@ -634,9 +630,8 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
 define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    mov z0.h, #0 // =0x0
@@ -694,9 +689,8 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
 define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
@@ -796,15 +790,14 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
 define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    lsl z1.h, z1.h, #15
-; CHECK-NEXT:    asr z1.h, z1.h, #15
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    asr z1.h, z1.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p1.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z1.h, #0
@@ -952,9 +945,8 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
@@ -1009,7 +1001,6 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
 define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov x8, #4 // =0x4
@@ -1122,9 +1113,8 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
 define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    lsl z0.d, z0.d, #63
 ; CHECK-NEXT:    asr z0.d, z0.d, #63
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
@@ -1161,10 +1151,9 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
 define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #2 // =0x2
-; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index a33e8537edf4ee..86d00978a53404 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -480,7 +480,6 @@ define <16 x i8> @test_revv16i8(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ldr q0, [x0]
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_revv16i8:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index e07036f2a1acfc..6eb79f24386cf8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -28,9 +28,9 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
@@ -126,43 +126,43 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q4, q5, [x0]
 ; CHECK-NEXT:    fcmne p1.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    ldp q1, q6, [x1]
-; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
 ; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
 ; CHECK-NEXT:    fcmne p5.s, p0/z, z2.s, #0.0
 ; CHECK-NEXT:    fcmne p4.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p7.s, p0/z, z4.s, #0.0
 ; CHECK-NEXT:    fcmne p6.s, p0/z, z6.s, #0.0
 ; CHECK-NEXT:    fcmne p0.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z2.s, p3/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z2.s, p3/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z4.s, p5/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z3.s, p4/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z6.s, p7/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z5.s, p6/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z7.s, p0/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z19.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z18.h, z6.h, z6.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z0.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
-; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
+; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z3.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z1.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
+; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    orr z0.d, z2.d, z0.d
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -335,43 +335,43 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q4, q5, [x0]
 ; CHECK-NEXT:    fcmne p1.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    ldp q1, q6, [x1]
-; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
 ; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
 ; CHECK-NEXT:    fcmne p5.s, p0/z, z2.s, #0.0
 ; CHECK-NEXT:    fcmne p4.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p7.s, p0/z, z4.s, #0.0
 ; CHECK-NEXT:    fcmne p6.s, p0/z, z6.s, #0.0
 ; CHECK-NEXT:    fcmne p0.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z2.s, p3/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z2.s, p3/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z4.s, p5/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z3.s, p4/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z6.s, p7/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z5.s, p6/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z7.s, p0/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z19.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z18.h, z6.h, z6.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z0.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
-; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
+; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z3.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z1.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
+; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z0.d, z2.d, z0.d
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
index 00a15f4bcd6394..019ab10d6d444f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
@@ -36,13 +36,11 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
-; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; STREAMING-SVE-NEXT:    uunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-SVE-NEXT:    uunpklo z3.h, z0.b
-; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z3.h, z0.b
 ; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    uunpklo z1.h, z1.b
 ; STREAMING-SVE-NEXT:    uunpklo z0.h, z0.b
 ; STREAMING-SVE-NEXT:    uunpklo z4.s, z2.h
@@ -50,9 +48,9 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    uunpklo z6.s, z3.h
 ; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; STREAMING-SVE-NEXT:    mov z5.d, z1.d
+; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    uunpklo z7.s, z0.h
 ; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
-; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    uunpklo z3.s, z3.h
 ; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
 ; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
@@ -67,7 +65,6 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
 ; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
 ; STREAMING-SVE-NEXT:    fmov x0, d0
-; STREAMING-SVE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-SVE-NEXT:    ret
   %1 = zext <32 x i8> %a to <32 x i32>
   %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)
@@ -104,13 +101,11 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
-; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; STREAMING-SVE-NEXT:    sunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-SVE-NEXT:    sunpklo z3.h, z0.b
-; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z3.h, z0.b
 ; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    sunpklo z1.h, z1.b
 ; STREAMING-SVE-NEXT:    sunpklo z0.h, z0.b
 ; STREAMING-SVE-NEXT:    sunpklo z4.s, z2.h
@@ -118,9 +113,9 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    sunpklo z6.s, z3.h
 ; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; STREAMING-SVE-NEXT:    mov z5.d, z1.d
+; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    sunpklo z7.s, z0.h
 ; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
-; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    sunpklo z3.s, z3.h
 ; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
 ; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
@@ -135,7 +130,6 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
 ; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
 ; STREAMING-SVE-NEXT:    fmov x0, d0
-; STREAMING-SVE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-SVE-NEXT:    ret
   %1 = sext <32 x i8> %a to <32 x i32>
   %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
index c942f1eca8ebaf..381ae5a0b38414 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
@@ -17,7 +17,6 @@ define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %el0 = extractelement <vscale x 4 x i1> %a, i32 0
   %el1 = extractelement <vscale x 4 x i1> %a, i32 1

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index c34cae12516ed8..5fe86db11b07a5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -14,10 +14,8 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    lsr z0.h, z0.h, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i8:
@@ -51,9 +49,7 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v8i8:
@@ -103,9 +99,7 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v16i8:
@@ -340,10 +334,8 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i16:
@@ -368,9 +360,7 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i16:
@@ -404,9 +394,7 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v8i16:
@@ -545,9 +533,7 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: bitreverse_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i32:
@@ -570,9 +556,7 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: bitreverse_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i32:
@@ -645,9 +629,7 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: bitreverse_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v1i64:
@@ -668,9 +650,7 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: bitreverse_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i64:
@@ -732,10 +712,8 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: bswap_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i16:
@@ -774,9 +752,7 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: bswap_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v4i16:
@@ -810,9 +786,7 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: bswap_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v8i16:
@@ -954,9 +928,7 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: bswap_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i32:
@@ -990,9 +962,7 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: bswap_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v4i32:
@@ -1134,9 +1104,7 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: bswap_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v1i64:
@@ -1170,9 +1138,7 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: bswap_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index 85ba9640002347..dd7d2264188f46 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -10,10 +10,8 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
@@ -59,9 +57,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
@@ -127,9 +123,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
@@ -460,10 +454,8 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
@@ -491,9 +483,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
@@ -535,9 +525,7 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
@@ -724,9 +712,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
@@ -753,9 +739,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
@@ -852,9 +836,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
@@ -878,9 +860,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
index c7b2575266d65b..27ae572206232e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
@@ -9,9 +9,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -38,8 +37,6 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2
 ; CHECK-LABEL: interleave_store_without_splat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -75,13 +72,12 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2)
 ; CHECK-LABEL: interleave_store_legalization:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z2_z3
-; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    mov z2.d, z3.d
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    st2w { z4.s, z5.s }, p0, [x0]
-; CHECK-NEXT:    st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT:    st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: interleave_store_legalization:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
index a4cf5d608fed6d..071ab0bc1a7132 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
@@ -14,7 +14,6 @@ define <4 x i8> @splat_v4i8(i8 %a) {
 ; CHECK-LABEL: splat_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4i8:
@@ -37,7 +36,6 @@ define <8 x i8> @splat_v8i8(i8 %a) {
 ; CHECK-LABEL: splat_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v8i8:
@@ -64,7 +62,6 @@ define <16 x i8> @splat_v16i8(i8 %a) {
 ; CHECK-LABEL: splat_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, w0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v16i8:
@@ -135,7 +132,6 @@ define <2 x i16> @splat_v2i16(i16 %a) {
 ; CHECK-LABEL: splat_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2i16:
@@ -155,7 +151,6 @@ define <4 x i16> @splat_v4i16(i16 %a) {
 ; CHECK-LABEL: splat_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4i16:
@@ -178,7 +173,6 @@ define <8 x i16> @splat_v8i16(i16 %a) {
 ; CHECK-LABEL: splat_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, w0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v8i16:
@@ -233,7 +227,6 @@ define <2 x i32> @splat_v2i32(i32 %a) {
 ; CHECK-LABEL: splat_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2i32:
@@ -253,7 +246,6 @@ define <4 x i32> @splat_v4i32(i32 %a) {
 ; CHECK-LABEL: splat_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, w0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4i32:
@@ -296,7 +288,6 @@ define <1 x i64> @splat_v1i64(i64 %a) {
 ; CHECK-LABEL: splat_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, x0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v1i64:
@@ -316,7 +307,6 @@ define <2 x i64> @splat_v2i64(i64 %a) {
 ; CHECK-LABEL: splat_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, x0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2i64:
@@ -358,9 +348,7 @@ define void @splat_v4i64(i64 %a, ptr %b) {
 define <2 x half> @splat_v2f16(half %a) {
 ; CHECK-LABEL: splat_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2f16:
@@ -380,9 +368,7 @@ define <2 x half> @splat_v2f16(half %a) {
 define <4 x half> @splat_v4f16(half %a) {
 ; CHECK-LABEL: splat_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4f16:
@@ -404,9 +390,7 @@ define <4 x half> @splat_v4f16(half %a) {
 define <8 x half> @splat_v8f16(half %a) {
 ; CHECK-LABEL: splat_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v8f16:
@@ -431,7 +415,6 @@ define <8 x half> @splat_v8f16(half %a) {
 define void @splat_v16f16(half %a, ptr %b) {
 ; CHECK-LABEL: splat_v16f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret
@@ -461,9 +444,7 @@ define void @splat_v16f16(half %a, ptr %b) {
 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) {
 ; CHECK-LABEL: splat_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2f32:
@@ -482,9 +463,7 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) {
 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) {
 ; CHECK-LABEL: splat_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4f32:
@@ -503,7 +482,6 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) {
 define void @splat_v8f32(float %a, ptr %b) {
 ; CHECK-LABEL: splat_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret
@@ -545,9 +523,7 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) {
 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) {
 ; CHECK-LABEL: splat_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2f64:
@@ -564,7 +540,6 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) {
 define void @splat_v4f64(double %a, ptr %b) {
 ; CHECK-LABEL: splat_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 13fcd94ea8a260..409a42e8d478fd 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -129,8 +129,8 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) {
 define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
 ; CHECK-LABEL: store_trunc_v2i256i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr d1, [x0, #32]
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    ldr d1, [x0, #32]
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; CHECK-NEXT:    str q0, [x1]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index 9d241f6f927e11..03f377a5986871 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -15,10 +15,9 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z2.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8:
@@ -223,16 +222,16 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
 ; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z3.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z17.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z0.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z3.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z3.b, p0, { z16.b, z17.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    add z2.b, z2.b, z2.b
@@ -527,49 +526,49 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    ldp q16, q17, [x0, #224]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
-; CHECK-NEXT:    ldp q3, q18, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
-; CHECK-NEXT:    ldp q2, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
-; CHECK-NEXT:    uzp1 z21.b, z18.b, z18.b
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z24.b, z19.b, z19.b
-; CHECK-NEXT:    ldp q3, q19, [x0, #96]
-; CHECK-NEXT:    uzp1 z23.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z26.b, z22.b, z22.b
-; CHECK-NEXT:    splice z2.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z25.b, z18.b, z18.b
-; CHECK-NEXT:    splice z7.b, p0, { z20.b, z21.b }
-; CHECK-NEXT:    uzp1 z21.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z17.b, z17.b, z17.b
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z16.b, z16.b, z16.b
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    uzp1 z19.b, z19.b, z19.b
-; CHECK-NEXT:    uzp1 z20.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z5.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z16.b, z6.b, z6.b
-; CHECK-NEXT:    splice z6.b, p0, { z23.b, z24.b }
-; CHECK-NEXT:    uzp1 z18.b, z3.b, z3.b
-; CHECK-NEXT:    splice z3.b, p0, { z25.b, z26.b }
-; CHECK-NEXT:    uzp1 z4.b, z0.b, z0.b
-; CHECK-NEXT:    add z0.b, z2.b, z2.b
-; CHECK-NEXT:    add z7.b, z7.b, z7.b
-; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    splice z2.b, p0, { z18.b, z19.b }
-; CHECK-NEXT:    splice z16.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    uzp1 z18.b, z18.b, z18.b
+; CHECK-NEXT:    uzp1 z7.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z21.b, z21.b, z21.b
+; CHECK-NEXT:    uzp1 z20.b, z20.b, z20.b
+; CHECK-NEXT:    splice z3.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
+; CHECK-NEXT:    uzp1 z17.b, z23.b, z23.b
+; CHECK-NEXT:    uzp1 z16.b, z22.b, z22.b
+; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z18.b, p0, { z18.b, z19.b }
+; CHECK-NEXT:    splice z19.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    splice z6.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    add z2.b, z2.b, z2.b
 ; CHECK-NEXT:    splice z4.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    add z6.b, z6.b, z6.b
 ; CHECK-NEXT:    add z3.b, z3.b, z3.b
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    add z0.b, z1.b, z1.b
-; CHECK-NEXT:    add z1.b, z2.b, z2.b
-; CHECK-NEXT:    add z2.b, z16.b, z16.b
-; CHECK-NEXT:    stp q6, q3, [x1, #64]
-; CHECK-NEXT:    add z3.b, z4.b, z4.b
-; CHECK-NEXT:    stp q0, q1, [x1, #32]
-; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    add z5.b, z18.b, z18.b
+; CHECK-NEXT:    add z1.b, z19.b, z19.b
+; CHECK-NEXT:    stp q2, q3, [x1, #96]
+; CHECK-NEXT:    add z2.b, z6.b, z6.b
+; CHECK-NEXT:    add z3.b, z7.b, z7.b
+; CHECK-NEXT:    add z4.b, z4.b, z4.b
+; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    stp q5, q1, [x1, #64]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q4, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
@@ -1183,11 +1182,10 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8:
@@ -1227,10 +1225,9 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8:
@@ -1282,25 +1279,25 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z16.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z19.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
-; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z2.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
+; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z1.b, z2.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -1429,56 +1426,56 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #160]
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    ldp q16, q17, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #64]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #96]
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    ldp q3, q18, [x0, #128]
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
-; CHECK-NEXT:    ldp q2, q19, [x0, #192]
-; CHECK-NEXT:    ldp q0, q1, [x0, #64]
-; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
-; CHECK-NEXT:    ldp q18, q22, [x0, #224]
-; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
-; CHECK-NEXT:    ldp q3, q23, [x0, #32]
-; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z27.h, z19.h, z19.h
-; CHECK-NEXT:    uzp1 z25.h, z22.h, z22.h
-; CHECK-NEXT:    uzp1 z26.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z24.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z18.h, z23.h, z23.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
-; CHECK-NEXT:    splice z1.h, p0, { z20.h, z21.h }
-; CHECK-NEXT:    splice z6.h, p0, { z24.h, z25.h }
-; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z26.h, z27.h }
-; CHECK-NEXT:    splice z7.h, p0, { z17.h, z18.h }
-; CHECK-NEXT:    uzp1 z17.b, z16.b, z16.b
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    ldp q18, q19, [x0, #192]
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT:    ldp q20, q21, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.h, z16.h, z16.h
+; CHECK-NEXT:    ldp q22, q23, [x0, #32]
 ; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z3.h, p0, { z22.h, z23.h }
+; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z21.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z20.h, z20.h, z20.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z23.h, z23.h, z23.h
+; CHECK-NEXT:    uzp1 z22.h, z22.h, z22.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z17.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z18.h, p0, { z22.h, z23.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    uzp1 z20.b, z16.b, z16.b
+; CHECK-NEXT:    uzp1 z19.b, z17.b, z17.b
+; CHECK-NEXT:    uzp1 z7.b, z18.b, z18.b
 ; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z0.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT:    splice z4.b, p0, { z5.b, z6.b }
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    add z2.b, z7.b, z7.b
-; CHECK-NEXT:    add z3.b, z4.b, z4.b
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT:    splice z1.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z19.b, z20.b }
+; CHECK-NEXT:    splice z4.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q0, q1, [x1]
+; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    add z3.b, z4.b, z4.b
+; CHECK-NEXT:    add z2.b, z2.b, z2.b
+; CHECK-NEXT:    stp q1, q0, [x1, #32]
+; CHECK-NEXT:    stp q3, q2, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
@@ -1767,10 +1764,9 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16:
@@ -1884,16 +1880,16 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
 ; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
-; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    add z0.h, z0.h, z0.h
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
 ; CHECK-NEXT:    add z2.h, z2.h, z2.h
@@ -2027,49 +2023,49 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    ldp q16, q17, [x0, #224]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    ldp q3, q18, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
-; CHECK-NEXT:    ldp q2, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
-; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z24.h, z19.h, z19.h
-; CHECK-NEXT:    ldp q3, q19, [x0, #96]
-; CHECK-NEXT:    uzp1 z23.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z26.h, z22.h, z22.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z25.h, z18.h, z18.h
-; CHECK-NEXT:    splice z7.h, p0, { z20.h, z21.h }
-; CHECK-NEXT:    uzp1 z21.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z16.h, z16.h, z16.h
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
-; CHECK-NEXT:    uzp1 z20.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z16.h, z6.h, z6.h
-; CHECK-NEXT:    splice z6.h, p0, { z23.h, z24.h }
-; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z25.h, z26.h }
-; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
-; CHECK-NEXT:    add z0.h, z2.h, z2.h
-; CHECK-NEXT:    add z7.h, z7.h, z7.h
-; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z21.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z20.h, z20.h, z20.h
+; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z17.h, z23.h, z23.h
+; CHECK-NEXT:    uzp1 z16.h, z22.h, z22.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z18.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z19.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    add z2.h, z2.h, z2.h
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    add z6.h, z6.h, z6.h
 ; CHECK-NEXT:    add z3.h, z3.h, z3.h
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    add z0.h, z1.h, z1.h
-; CHECK-NEXT:    add z1.h, z2.h, z2.h
-; CHECK-NEXT:    add z2.h, z16.h, z16.h
-; CHECK-NEXT:    stp q6, q3, [x1, #64]
-; CHECK-NEXT:    add z3.h, z4.h, z4.h
-; CHECK-NEXT:    stp q0, q1, [x1, #32]
-; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    add z5.h, z18.h, z18.h
+; CHECK-NEXT:    add z1.h, z19.h, z19.h
+; CHECK-NEXT:    stp q2, q3, [x1, #96]
+; CHECK-NEXT:    add z2.h, z6.h, z6.h
+; CHECK-NEXT:    add z3.h, z7.h, z7.h
+; CHECK-NEXT:    add z4.h, z4.h, z4.h
+; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    stp q5, q1, [x1, #64]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q4, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
@@ -2362,11 +2358,10 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8:
@@ -2400,11 +2395,10 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8:
@@ -2444,30 +2438,29 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z19.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z1.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z18.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z0.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
-; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    splice z1.s, p0, { z18.s, z19.s }
 ; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z1.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z3.s, p0, { z6.s, z7.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8:
@@ -2525,57 +2518,57 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q5, q6, [x0, #224]
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
-; CHECK-NEXT:    ldp q6, q18, [x0, #192]
-; CHECK-NEXT:    uzp1 z16.s, z5.s, z5.s
-; CHECK-NEXT:    ldp q5, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.s, z6.s, z6.s
-; CHECK-NEXT:    ldp q6, q23, [x0, #96]
-; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z26.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z23.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z6.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z22.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z5.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z3.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT:    splice z0.s, p0, { z24.s, z25.s }
-; CHECK-NEXT:    splice z7.s, p0, { z26.s, z27.s }
-; CHECK-NEXT:    splice z4.s, p0, { z17.s, z18.s }
-; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    ldp q16, q17, [x0, #192]
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
+; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
 ; CHECK-NEXT:    splice z5.s, p0, { z5.s, z6.s }
-; CHECK-NEXT:    splice z6.s, p0, { z22.s, z23.s }
-; CHECK-NEXT:    splice z1.s, p0, { z1.s, z2.s }
-; CHECK-NEXT:    uzp1 z16.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
+; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
+; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
+; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z23.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z22.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z6.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z18.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z17.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z18.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z19.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z7.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z4.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z7.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z6.b, z3.b, z3.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    splice z1.b, p0, { z3.b, z4.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -2733,11 +2726,10 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16:
@@ -2771,10 +2763,9 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16:
@@ -2815,25 +2806,25 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z19.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z0.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z18.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
-; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    splice z5.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z1.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z3.s, p0, { z6.s, z7.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z1.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    add z0.h, z0.h, z0.h
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -2915,56 +2906,56 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #160]
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    ldp q16, q17, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #64]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #96]
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q3, q18, [x0, #128]
-; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
-; CHECK-NEXT:    ldp q2, q19, [x0, #192]
-; CHECK-NEXT:    ldp q0, q1, [x0, #64]
-; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
-; CHECK-NEXT:    ldp q18, q22, [x0, #224]
-; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q3, q23, [x0, #32]
-; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z26.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z23.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z22.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
-; CHECK-NEXT:    splice z1.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    splice z6.s, p0, { z24.s, z25.s }
-; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
-; CHECK-NEXT:    splice z0.s, p0, { z26.s, z27.s }
-; CHECK-NEXT:    splice z7.s, p0, { z17.s, z18.s }
-; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    ldp q18, q19, [x0, #192]
+; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
+; CHECK-NEXT:    ldp q20, q21, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
+; CHECK-NEXT:    ldp q22, q23, [x0, #32]
 ; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z3.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
+; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
+; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z23.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z22.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z17.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z18.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z20.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z19.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z7.h, z18.h, z18.h
 ; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
-; CHECK-NEXT:    splice z4.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    add z2.h, z7.h, z7.h
-; CHECK-NEXT:    add z3.h, z4.h, z4.h
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z19.h, z20.h }
+; CHECK-NEXT:    splice z4.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q0, q1, [x1]
+; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    add z3.h, z4.h, z4.h
+; CHECK-NEXT:    add z2.h, z2.h, z2.h
+; CHECK-NEXT:    stp q1, q0, [x1, #32]
+; CHECK-NEXT:    stp q3, q2, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
@@ -3120,10 +3111,9 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32:
@@ -3209,16 +3199,16 @@ define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z6.s, z1.s, z1.s
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z3.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z2.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z17.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z16.s, z1.s, z1.s
 ; CHECK-NEXT:    splice z0.s, p0, { z6.s, z7.s }
-; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
-; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z3.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
 ; CHECK-NEXT:    add z0.s, z0.s, z0.s
 ; CHECK-NEXT:    add z1.s, z1.s, z1.s
 ; CHECK-NEXT:    add z2.s, z2.s, z2.s
@@ -3297,49 +3287,49 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    ldp q16, q17, [x0, #224]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q3, q18, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
-; CHECK-NEXT:    ldp q2, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
-; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z24.s, z19.s, z19.s
-; CHECK-NEXT:    ldp q3, q19, [x0, #96]
-; CHECK-NEXT:    uzp1 z23.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z26.s, z22.s, z22.s
-; CHECK-NEXT:    splice z2.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z17.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z25.s, z18.s, z18.s
-; CHECK-NEXT:    splice z7.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    uzp1 z21.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
 ; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z20.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z16.s, z6.s, z6.s
-; CHECK-NEXT:    splice z6.s, p0, { z23.s, z24.s }
-; CHECK-NEXT:    uzp1 z18.s, z3.s, z3.s
-; CHECK-NEXT:    splice z3.s, p0, { z25.s, z26.s }
-; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
-; CHECK-NEXT:    add z0.s, z2.s, z2.s
-; CHECK-NEXT:    add z7.s, z7.s, z7.s
-; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    splice z2.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
+; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
+; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z17.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z16.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    splice z18.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z19.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    splice z7.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    add z2.s, z2.s, z2.s
 ; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    add z6.s, z6.s, z6.s
 ; CHECK-NEXT:    add z3.s, z3.s, z3.s
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    add z0.s, z1.s, z1.s
-; CHECK-NEXT:    add z1.s, z2.s, z2.s
-; CHECK-NEXT:    add z2.s, z16.s, z16.s
-; CHECK-NEXT:    stp q6, q3, [x1, #64]
-; CHECK-NEXT:    add z3.s, z4.s, z4.s
-; CHECK-NEXT:    stp q0, q1, [x1, #32]
-; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    add z5.s, z18.s, z18.s
+; CHECK-NEXT:    add z1.s, z19.s, z19.s
+; CHECK-NEXT:    stp q2, q3, [x1, #96]
+; CHECK-NEXT:    add z2.s, z6.s, z6.s
+; CHECK-NEXT:    add z3.s, z7.s, z7.s
+; CHECK-NEXT:    add z4.s, z4.s, z4.s
+; CHECK-NEXT:    add z0.s, z0.s, z0.s
+; CHECK-NEXT:    stp q5, q1, [x1, #64]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q4, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index ea6123edc8b4c4..d1099b49a45153 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -10,10 +10,8 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i8:
@@ -37,8 +35,6 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.b, z0.b[7]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.b, w8
@@ -68,8 +64,6 @@ define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @shuffle_ext_byone_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.b, z0.b[15]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.b, w8
@@ -158,9 +152,7 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revw z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i16:
@@ -179,8 +171,6 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.h, z0.h[3]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.h, w8
@@ -208,8 +198,6 @@ define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @shuffle_ext_byone_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.h, w8
@@ -288,8 +276,6 @@ define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.s, w8
@@ -313,8 +299,6 @@ define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @shuffle_ext_byone_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.s, w8
@@ -385,8 +369,6 @@ define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) {
 define <2 x i64> @shuffle_ext_byone_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    insr z1.d, x8
@@ -448,11 +430,9 @@ define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) {
 define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.h, z0.h[3]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    insr z0.h, h2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f16:
@@ -476,11 +456,9 @@ define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
 define <8 x half> @shuffle_ext_byone_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.h, z0.h[7]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.h, h2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f16:
@@ -553,11 +531,9 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) {
 define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    insr z0.s, s2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f32:
@@ -577,11 +553,9 @@ define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2)
 define <4 x float> @shuffle_ext_byone_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.s, z0.s[3]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.s, s2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f32:
@@ -646,11 +620,9 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) {
 define <2 x double> @shuffle_ext_byone_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.d, z0.d[1]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.d, d2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 9c7a3d5046d0e8..37435e35ceabfe 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -39,7 +39,6 @@ define <2 x i64> @fixed_vec_zero_constant() {
 ; CHECK-LABEL: fixed_vec_zero_constant:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #0 // =0x0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_vec_zero_constant:
@@ -54,7 +53,6 @@ define <2 x double> @fixed_vec_fp_zero_constant() {
 ; CHECK-LABEL: fixed_vec_fp_zero_constant:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #0 // =0x0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant:

diff  --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
index 91f8f5c2c90d84..d9e8771b5e70af 100644
--- a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
@@ -43,7 +43,6 @@ define i32 @test(<vscale x 32 x i8> %bin.rdx, <vscale x 32 x i8> %bin.rdx2)  {
 ; CHECK-NEXT:    add z0.s, z1.s, z0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %a = sext <vscale x 32 x i8> %bin.rdx to <vscale x 32 x i32>
   %b = sext <vscale x 32 x i8> %bin.rdx2 to <vscale x 32 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index 84c15e4fbc33c7..0e7cff4835e720 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -160,13 +160,11 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    shl v1.4s, v1.4s, #31
 ; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
 ; CHECK-NEXT:    and z1.s, z1.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; CHECK-NEXT:    compact z0.s, p0, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
     %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> undef)
     ret <4 x i32> %out
@@ -197,8 +195,6 @@ define <4 x double> @test_compress_v4f64_with_sve(<4 x double> %vec, <4 x i1> %m
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    ushll v3.2d, v2.2s, #0
 ; CHECK-NEXT:    ushll2 v4.2d, v2.4s, #0
 ; CHECK-NEXT:    fmov x8, d2

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 8dd433b6f23c68..55e4d56f2cad40 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -104,7 +104,6 @@ define <vscale x 2 x i64> @sve_splat_2xi64_dupm_imm() {
 define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
 ; CHECK-LABEL: sve_splat_2xi8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, x0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i8> undef, i8 %val, i32 0
@@ -143,7 +142,6 @@ define <vscale x 8 x i8> @sve_splat_8xi8_imm() {
 define <vscale x 2 x i16> @sve_splat_2xi16(i16 %val) {
 ; CHECK-LABEL: sve_splat_2xi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, x0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i16> undef, i16 %val, i32 0
@@ -172,7 +170,6 @@ define <vscale x 4 x i16> @sve_splat_4xi16_imm() {
 define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
 ; CHECK-LABEL: sve_splat_2xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, x0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i32> undef, i32 %val, i32 0
@@ -216,7 +213,6 @@ define <vscale x 12 x i32> @sve_splat_12xi32(i32 %val) {
 define <vscale x 2 x i1> @sve_splat_2xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_2xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret
@@ -228,7 +224,6 @@ define <vscale x 2 x i1> @sve_splat_2xi1(i1 %val) {
 define <vscale x 4 x i1> @sve_splat_4xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_4xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
@@ -240,7 +235,6 @@ define <vscale x 4 x i1> @sve_splat_4xi1(i1 %val) {
 define <vscale x 8 x i1> @sve_splat_8xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_8xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    ret
@@ -252,7 +246,6 @@ define <vscale x 8 x i1> @sve_splat_8xi1(i1 %val) {
 define <vscale x 16 x i1> @sve_splat_16xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_16xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    ret
@@ -266,7 +259,6 @@ define <vscale x 16 x i1> @sve_splat_16xi1(i1 %val) {
 define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
 ; CHECK-LABEL: splat_nxv8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 8 x bfloat> undef, bfloat %val, i32 0
@@ -277,7 +269,6 @@ define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
 define <vscale x 4 x bfloat> @splat_nxv4bf16(bfloat %val) #0 {
 ; CHECK-LABEL: splat_nxv4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 4 x bfloat> undef, bfloat %val, i32 0
@@ -288,7 +279,6 @@ define <vscale x 4 x bfloat> @splat_nxv4bf16(bfloat %val) #0 {
 define <vscale x 2 x bfloat> @splat_nxv2bf16(bfloat %val) #0 {
 ; CHECK-LABEL: splat_nxv2bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x bfloat> undef, bfloat %val, i32 0
@@ -299,7 +289,6 @@ define <vscale x 2 x bfloat> @splat_nxv2bf16(bfloat %val) #0 {
 define <vscale x 8 x half> @splat_nxv8f16(half %val) {
 ; CHECK-LABEL: splat_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 8 x half> undef, half %val, i32 0
@@ -310,7 +299,6 @@ define <vscale x 8 x half> @splat_nxv8f16(half %val) {
 define <vscale x 4 x half> @splat_nxv4f16(half %val) {
 ; CHECK-LABEL: splat_nxv4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 4 x half> undef, half %val, i32 0
@@ -321,7 +309,6 @@ define <vscale x 4 x half> @splat_nxv4f16(half %val) {
 define <vscale x 2 x half> @splat_nxv2f16(half %val) {
 ; CHECK-LABEL: splat_nxv2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x half> undef, half %val, i32 0
@@ -332,7 +319,6 @@ define <vscale x 2 x half> @splat_nxv2f16(half %val) {
 define <vscale x 4 x float> @splat_nxv4f32(float %val) {
 ; CHECK-LABEL: splat_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 4 x float> undef, float %val, i32 0
@@ -343,7 +329,6 @@ define <vscale x 4 x float> @splat_nxv4f32(float %val) {
 define <vscale x 2 x float> @splat_nxv2f32(float %val) {
 ; CHECK-LABEL: splat_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x float> undef, float %val, i32 0
@@ -354,7 +339,6 @@ define <vscale x 2 x float> @splat_nxv2f32(float %val) {
 define <vscale x 2 x double> @splat_nxv2f64(double %val) {
 ; CHECK-LABEL: splat_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x double> undef, double %val, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
index dad357c8a0c132..0ec2bc60c5a8a8 100644
--- a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -263,9 +263,7 @@ define i32 @incb_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: incb_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    addvl x0, x0, #3
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -284,9 +282,7 @@ define i32 @inch_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: inch_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    inch x0, all, mul #7
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -305,9 +301,7 @@ define i32 @incw_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: incw_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incw x0, all, mul #7
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -326,9 +320,7 @@ define i32 @incd_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: incd_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incd x0, all, mul #7
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -349,9 +341,7 @@ define i32 @decb_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: decb_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    addvl x0, x0, #-4
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -370,9 +360,7 @@ define i32 @dech_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: dech_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dech x0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -391,9 +379,7 @@ define i32 @decw_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: decw_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decw x0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -412,9 +398,7 @@ define i32 @decd_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: decd_scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decd x0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %vscale = call i64 @llvm.vscale.i64()
   %mul = mul i64 %vscale, 2

diff  --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index 06cd65620d1c9e..ba3bb51b38b475 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -54,7 +54,6 @@ define void @histogram_i32_promote(ptr %base, <vscale x 2 x i64> %indices, <vsca
 ; CHECK-LABEL: histogram_i32_promote:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.d, p0/z, z0.d, z0.d
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov z3.d, x1
 ; CHECK-NEXT:    ld1w { z2.d }, p0/z, [x0, z0.d, lsl #2]
 ; CHECK-NEXT:    ptrue p1.d
@@ -100,7 +99,6 @@ define void @histogram_i16_2_lane(ptr %base, <vscale x 2 x i64> %indices, <vscal
 ; CHECK-LABEL: histogram_i16_2_lane:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.d, p0/z, z0.d, z0.d
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov z3.d, x1
 ; CHECK-NEXT:    ld1h { z2.d }, p0/z, [x0, z0.d, lsl #1]
 ; CHECK-NEXT:    ptrue p1.d
@@ -116,7 +114,6 @@ define void @histogram_i8_2_lane(ptr %base, <vscale x 2 x i64> %indices, <vscale
 ; CHECK-LABEL: histogram_i8_2_lane:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.d, p0/z, z0.d, z0.d
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov z3.d, x1
 ; CHECK-NEXT:    ld1b { z2.d }, p0/z, [x0, z0.d]
 ; CHECK-NEXT:    ptrue p1.d

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
index 5cea7536e1f3ca..f97585b78fb3e9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
@@ -77,7 +77,7 @@ define <vscale x 8 x i16> @test_luti4_lane_i16_x2(<vscale x 8 x i16> %table, <vs
 ; CHECK-LABEL: test_luti4_lane_i16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z0.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> %table, <vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
@@ -88,7 +88,7 @@ define <vscale x 8 x half> @test_luti4_lane_f16_x2(<vscale x 8 x half> %table, <
 ; CHECK-LABEL: test_luti4_lane_f16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z0.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> %table, <vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
@@ -99,7 +99,7 @@ define <vscale x 8 x bfloat> @test_luti4_lane_bf16_x2(<vscale x 8 x bfloat> %tab
 ; CHECK-LABEL: test_luti4_lane_bf16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z0.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
index 9fd1eb616c28c9..b200eb3f23bf2a 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
@@ -8,9 +8,8 @@
 define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: tbl2_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.b, { z1.b, z2.b }, z3.b
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.b, { z0.b, z1.b }, z3.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl2.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
@@ -21,9 +20,8 @@ define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unu
 define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbl2_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.h, { z0.h, z1.h }, z3.h
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl2.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
@@ -34,9 +32,8 @@ define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unu
 define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbl2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.s, { z1.s, z2.s }, z3.s
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.s, { z0.s, z1.s }, z3.s
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl2.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
@@ -47,9 +44,8 @@ define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unu
 define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbl2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.d, { z1.d, z2.d }, z3.d
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d, z1.d }, z3.d
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl2.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
@@ -60,9 +56,8 @@ define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unu
 define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half> %unused, <vscale x 8 x half> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbl2_fh:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.h, { z0.h, z1.h }, z3.h
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl2.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
@@ -73,9 +68,8 @@ define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half>
 define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %unused, <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: tbl2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.h, { z0.h, z1.h }, z3.h
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl2.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b,
@@ -86,9 +80,8 @@ define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x float> %unused, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbl2_fs:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.s, { z1.s, z2.s }, z3.s
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.s, { z0.s, z1.s }, z3.s
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl2.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
@@ -99,9 +92,8 @@ define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x floa
 define <vscale x 2 x double> @tbl2_fd(<vscale x 2 x double> %a, <vscale x 2 x double> %unused, <vscale x 2 x double> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbl2_fd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    tbl z0.d, { z1.d, z2.d }, z3.d
+; CHECK-NEXT:    mov z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d, z1.d }, z3.d
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl2.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
index baadd08e392d05..47709757668b39 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
@@ -15,8 +15,6 @@ declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fclamp.nxv8bf16(<vscale x 8 x bf
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x2_f16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d){
 ; CHECK-LABEL: test_bfclamp_single_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d)
@@ -26,10 +24,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x2_
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x4_f16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d, <vscale x 8 x bfloat> %e, <vscale x 8 x bfloat> %f){
 ; CHECK-LABEL: test_bfclamp_single_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d, <vscale x 8 x bfloat> %e, <vscale x 8 x bfloat> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
index 0a5a0b7cfcc880..35ee8c8b77b6f1 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
@@ -33,8 +33,6 @@ define <vscale x 2 x double> @test_fclamp_f64(<vscale x 2 x double> %a, <vscale
 define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) #1 {
 ; CHECK-LABEL: test_fclamp_single_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d)
@@ -44,8 +42,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<
 define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) #1 {
 ; CHECK-LABEL: test_fclamp_single_x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d)
@@ -55,8 +51,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32
 define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) #1 {
 ; CHECK-LABEL: test_fclamp_single_x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d)
@@ -67,10 +61,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x4_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f) #1 {
 ; CHECK-LABEL: test_fclamp_single_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
@@ -80,10 +70,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x4_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f) #1 {
 ; CHECK-LABEL: test_fclamp_single_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
@@ -93,10 +79,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x4_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f) #1 {
 ; CHECK-LABEL: test_fclamp_single_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
index 8fe0694808c8e9..57e1a1e100db0b 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
@@ -7,8 +7,6 @@
 define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -21,8 +19,6 @@ define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -36,8 +32,6 @@ define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -51,8 +45,6 @@ define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -66,8 +58,6 @@ define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -81,8 +71,6 @@ define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -96,8 +84,6 @@ define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -111,8 +97,6 @@ define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -127,8 +111,6 @@ define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2q_si_i8_off16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
@@ -142,8 +124,6 @@ define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2q_si_i8_off14:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
@@ -157,8 +137,6 @@ define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
@@ -172,8 +150,6 @@ define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
@@ -187,8 +163,6 @@ define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
@@ -202,8 +176,6 @@ define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
@@ -217,8 +189,6 @@ define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
@@ -232,8 +202,6 @@ define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
@@ -247,8 +215,6 @@ define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
@@ -266,9 +232,6 @@ define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -283,9 +246,6 @@ define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -300,9 +260,6 @@ define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -317,9 +274,6 @@ define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -334,9 +288,6 @@ define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -351,9 +302,6 @@ define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -368,9 +316,6 @@ define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -385,9 +330,6 @@ define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -402,9 +344,6 @@ define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i8_off24:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
@@ -419,9 +358,6 @@ define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i8_off21:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
@@ -436,9 +372,6 @@ define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
@@ -453,9 +386,6 @@ define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
@@ -470,9 +400,6 @@ define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
@@ -487,9 +414,6 @@ define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale
 define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
@@ -504,9 +428,6 @@ define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
@@ -521,9 +442,6 @@ define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
@@ -538,9 +456,6 @@ define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
@@ -558,10 +473,6 @@ define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -577,10 +488,6 @@ define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -596,10 +503,6 @@ define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -615,10 +518,6 @@ define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -634,10 +533,6 @@ define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -653,10 +548,6 @@ define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -672,10 +563,6 @@ define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -691,10 +578,6 @@ define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -710,10 +593,6 @@ define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i8_off32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
@@ -729,10 +608,6 @@ define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i8_off28:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
@@ -748,10 +623,6 @@ define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3,  <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
@@ -767,10 +638,6 @@ define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
@@ -786,10 +653,6 @@ define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
@@ -805,10 +668,6 @@ define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
@@ -824,10 +683,6 @@ define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3,  <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
@@ -843,10 +698,6 @@ define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
@@ -862,10 +713,6 @@ define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index 0475601f1907eb..15efb8cf1ad5a4 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -42,8 +42,6 @@ define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.b, z1.b }, z2.b, z3.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -53,8 +51,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vsc
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x  8 x i16>, <vscale x  8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -64,8 +60,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vs
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -75,8 +69,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vs
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -86,10 +78,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vs
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.b - z3.b }, z4.b, z5.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -99,10 +87,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -112,10 +96,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -125,10 +105,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
index b19b5d871459a8..2164b3161da5c4 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
@@ -8,18 +8,18 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.b
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1b { z27.b }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -33,18 +33,18 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.h
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -58,18 +58,18 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.h
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -83,18 +83,18 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.h
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -108,18 +108,18 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -133,18 +133,18 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -158,18 +158,18 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -183,18 +183,18 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
index e7d9c3e6839e43..e46a7da551546f 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
@@ -10,9 +10,9 @@ define void @st1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1b { z2.b, z3.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -28,9 +28,9 @@ define void @st1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -46,9 +46,9 @@ define void @st1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -64,9 +64,9 @@ define void @st1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -82,9 +82,9 @@ define void @st1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -100,9 +100,9 @@ define void @st1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -118,9 +118,9 @@ define void @st1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -136,9 +136,9 @@ define void @st1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -154,11 +154,11 @@ define void @st1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1b { z4.b - z7.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -174,11 +174,11 @@ define void @st1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -194,11 +194,11 @@ define void @st1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -214,11 +214,11 @@ define void @st1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -234,11 +234,11 @@ define void @st1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -254,11 +254,11 @@ define void @st1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -274,11 +274,11 @@ define void @st1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -294,11 +294,11 @@ define void @st1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -316,9 +316,9 @@ define void @stnt1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1b { z2.b, z3.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -334,9 +334,9 @@ define void @stnt1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -352,9 +352,9 @@ define void @stnt1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -370,9 +370,9 @@ define void @stnt1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -388,9 +388,9 @@ define void @stnt1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -406,9 +406,9 @@ define void @stnt1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -424,9 +424,9 @@ define void @stnt1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -442,9 +442,9 @@ define void @stnt1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -460,11 +460,11 @@ define void @stnt1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1b { z4.b - z7.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -480,11 +480,11 @@ define void @stnt1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -500,11 +500,11 @@ define void @stnt1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -520,11 +520,11 @@ define void @stnt1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -540,11 +540,11 @@ define void @stnt1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -560,11 +560,11 @@ define void @stnt1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -580,11 +580,11 @@ define void @stnt1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -600,11 +600,11 @@ define void @stnt1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index a39f6cfa3b4c56..65ebd3f7cd0bd4 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -42,8 +42,6 @@ define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.b, z1.b }, z2.b, z3.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -53,8 +51,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vsc
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x  8 x i16>, <vscale x  8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -64,8 +60,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vs
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -75,8 +69,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vs
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -86,10 +78,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vs
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.b - z3.b }, z4.b, z5.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -99,10 +87,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -112,10 +96,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -125,10 +105,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
index 198d3c92158466..53436794b5b3cc 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
@@ -97,11 +97,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @uzp_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
 ; CHECK-LABEL: uzp_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z27.d, z5.d
-; CHECK-NEXT:    mov z26.d, z4.d
-; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    uzp { z0.d - z3.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z7.d, z5.d
+; CHECK-NEXT:    mov z6.d, z4.d
+; CHECK-NEXT:    mov z5.d, z3.d
+; CHECK-NEXT:    mov z4.d, z2.d
+; CHECK-NEXT:    uzp { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzp.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
@@ -204,11 +204,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @zipq_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
 ; CHECK-LABEL: zipq_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z27.d, z5.d
-; CHECK-NEXT:    mov z26.d, z4.d
-; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    uzp { z0.q - z3.q }, { z24.q - z27.q }
+; CHECK-NEXT:    mov z7.d, z5.d
+; CHECK-NEXT:    mov z6.d, z4.d
+; CHECK-NEXT:    mov z5.d, z3.d
+; CHECK-NEXT:    mov z4.d, z2.d
+; CHECK-NEXT:    uzp { z0.q - z3.q }, { z4.q - z7.q }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzpq.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
index ab70f57b488742..638849605a2cb8 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
@@ -7,7 +7,6 @@ define <vscale x 16 x i1> @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -18,7 +17,6 @@ define <vscale x 8 x i1> @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -29,7 +27,6 @@ define <vscale x 4 x i1> @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -40,7 +37,6 @@ define <vscale x 2 x i1> @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -54,7 +50,6 @@ define <vscale x 16 x i1> @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -65,7 +60,6 @@ define <vscale x 8 x i1> @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -76,7 +70,6 @@ define <vscale x 4 x i1> @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -87,7 +80,6 @@ define <vscale x 2 x i1> @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -101,7 +93,6 @@ define <vscale x 16 x i1> @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -112,7 +103,6 @@ define <vscale x 8 x i1> @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -123,7 +113,6 @@ define <vscale x 4 x i1> @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -134,7 +123,6 @@ define <vscale x 2 x i1> @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -148,7 +136,6 @@ define <vscale x 16 x i1> @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -159,7 +146,6 @@ define <vscale x 8 x i1> @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -170,7 +156,6 @@ define <vscale x 4 x i1> @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -181,7 +166,6 @@ define <vscale x 2 x i1> @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -195,7 +179,6 @@ define <vscale x 16 x i1> @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -206,7 +189,6 @@ define <vscale x 8 x i1> @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -217,7 +199,6 @@ define <vscale x 4 x i1> @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -228,7 +209,6 @@ define <vscale x 2 x i1> @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -242,7 +222,6 @@ define <vscale x 16 x i1> @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -253,7 +232,6 @@ define <vscale x 8 x i1> @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -264,7 +242,6 @@ define <vscale x 4 x i1> @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -275,7 +252,6 @@ define <vscale x 2 x i1> @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -289,7 +265,6 @@ define <vscale x 16 x i1> @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -300,7 +275,6 @@ define <vscale x 8 x i1> @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -311,7 +285,6 @@ define <vscale x 4 x i1> @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -322,7 +295,6 @@ define <vscale x 2 x i1> @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -336,7 +308,6 @@ define <vscale x 16 x i1> @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -347,7 +318,6 @@ define <vscale x 8 x i1> @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -358,7 +328,6 @@ define <vscale x 4 x i1> @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -369,7 +338,6 @@ define <vscale x 2 x i1> @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0

diff  --git a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
index d3abc27a53dadc..77415381709d18 100644
--- a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
+++ b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
@@ -9,6 +9,7 @@ define void @"func"(ptr swifterror %0) #0 {
 ; CHECK-NEXT:    b {{\.?}}LBB0_2
 ; CHECK-NEXT:  {{\.?}}LBB0_1:{{.*}}%thirtythree
 ; CHECK-NEXT:  {{.*}}=>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  {{.*}}implicit-def: $x0
 ; CHECK-NEXT:    b {{\.?}}LBB0_1
 ; CHECK-NEXT:  {{\.?}}LBB0_2:{{.*}}%thirtyeight
 ; CHECK-NEXT:    b {{\.?}}LBB0_3

diff  --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 0ad99008655184..dd5ce449bb1d2a 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -203,16 +203,17 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmgt v3.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    fcmgt v4.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    fcmlt v5.4s, v1.4s, #0.0
-; CHECK-NEXT:    bsl v3.16b, v0.16b, v1.16b
-; CHECK-NEXT:    bsl v4.16b, v0.16b, v2.16b
-; CHECK-NEXT:    fcmlt v1.4s, v2.4s, #0.0
-; CHECK-NEXT:    bic v2.16b, v3.16b, v5.16b
-; CHECK-NEXT:    bic v1.16b, v4.16b, v1.16b
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-NEXT:    bit v1.16b, v0.16b, v3.16b
+; CHECK-NEXT:    mov v3.16b, v4.16b
+; CHECK-NEXT:    bsl v3.16b, v0.16b, v2.16b
+; CHECK-NEXT:    fcmlt v2.4s, v2.4s, #0.0
+; CHECK-NEXT:    bic v1.16b, v1.16b, v5.16b
+; CHECK-NEXT:    bic v2.16b, v3.16b, v2.16b
 ; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
+; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
 ; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    trn1 v1.8b, v2.8b, v1.8b
+; CHECK-NEXT:    xtn v2.4h, v2.4s
+; CHECK-NEXT:    trn1 v1.8b, v1.8b, v2.8b
 ; CHECK-NEXT:    str d1, [x0], #8
 ; CHECK-NEXT:    b.ne .LBB1_9
 ; CHECK-NEXT:  // %bb.10: // %middle.block
@@ -352,21 +353,22 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmgt v6.4s, v3.4s, v0.4s
 ; CHECK-NEXT:    fcmgt v7.4s, v4.4s, v0.4s
 ; CHECK-NEXT:    fcmlt v16.4s, v2.4s, #0.0
-; CHECK-NEXT:    fcmlt v17.4s, v3.4s, #0.0
-; CHECK-NEXT:    bsl v5.16b, v0.16b, v2.16b
-; CHECK-NEXT:    bsl v6.16b, v0.16b, v3.16b
-; CHECK-NEXT:    bsl v7.16b, v0.16b, v4.16b
-; CHECK-NEXT:    fcmlt v2.4s, v4.4s, #0.0
-; CHECK-NEXT:    bic v3.16b, v5.16b, v16.16b
-; CHECK-NEXT:    bic v4.16b, v6.16b, v17.16b
-; CHECK-NEXT:    bic v2.16b, v7.16b, v2.16b
+; CHECK-NEXT:    bit v2.16b, v0.16b, v5.16b
+; CHECK-NEXT:    fcmlt v5.4s, v3.4s, #0.0
+; CHECK-NEXT:    bit v3.16b, v0.16b, v6.16b
+; CHECK-NEXT:    mov v6.16b, v7.16b
+; CHECK-NEXT:    bsl v6.16b, v0.16b, v4.16b
+; CHECK-NEXT:    fcmlt v4.4s, v4.4s, #0.0
+; CHECK-NEXT:    bic v2.16b, v2.16b, v16.16b
+; CHECK-NEXT:    bic v3.16b, v3.16b, v5.16b
+; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-NEXT:    bic v4.16b, v6.16b, v4.16b
 ; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
 ; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    xtn v5.4h, v3.4s
-; CHECK-NEXT:    xtn v6.4h, v4.4s
-; CHECK-NEXT:    xtn v7.4h, v2.4s
-; CHECK-NEXT:    tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b
+; CHECK-NEXT:    xtn v2.4h, v2.4s
+; CHECK-NEXT:    xtn v3.4h, v3.4s
+; CHECK-NEXT:    xtn v4.4h, v4.4s
+; CHECK-NEXT:    tbl v2.16b, { v2.16b, v3.16b, v4.16b }, v1.16b
 ; CHECK-NEXT:    st1 { v2.s }[2], [x13]
 ; CHECK-NEXT:    str d2, [x0], #12
 ; CHECK-NEXT:    b.ne .LBB2_4
@@ -605,26 +607,27 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmgt v16.4s, v4.4s, v0.4s
 ; CHECK-NEXT:    fcmgt v17.4s, v5.4s, v0.4s
 ; CHECK-NEXT:    fcmlt v18.4s, v2.4s, #0.0
-; CHECK-NEXT:    fcmlt v19.4s, v3.4s, #0.0
-; CHECK-NEXT:    fcmlt v20.4s, v4.4s, #0.0
-; CHECK-NEXT:    bsl v6.16b, v0.16b, v2.16b
-; CHECK-NEXT:    bsl v7.16b, v0.16b, v3.16b
-; CHECK-NEXT:    bsl v16.16b, v0.16b, v4.16b
-; CHECK-NEXT:    bsl v17.16b, v0.16b, v5.16b
-; CHECK-NEXT:    fcmlt v2.4s, v5.4s, #0.0
-; CHECK-NEXT:    bic v3.16b, v6.16b, v18.16b
-; CHECK-NEXT:    bic v4.16b, v7.16b, v19.16b
-; CHECK-NEXT:    bic v5.16b, v16.16b, v20.16b
-; CHECK-NEXT:    bic v2.16b, v17.16b, v2.16b
+; CHECK-NEXT:    bit v2.16b, v0.16b, v6.16b
+; CHECK-NEXT:    fcmlt v6.4s, v3.4s, #0.0
+; CHECK-NEXT:    bit v3.16b, v0.16b, v7.16b
+; CHECK-NEXT:    fcmlt v7.4s, v4.4s, #0.0
+; CHECK-NEXT:    bit v4.16b, v0.16b, v16.16b
+; CHECK-NEXT:    mov v16.16b, v17.16b
+; CHECK-NEXT:    bsl v16.16b, v0.16b, v5.16b
+; CHECK-NEXT:    fcmlt v5.4s, v5.4s, #0.0
+; CHECK-NEXT:    bic v2.16b, v2.16b, v18.16b
+; CHECK-NEXT:    bic v3.16b, v3.16b, v6.16b
+; CHECK-NEXT:    bic v4.16b, v4.16b, v7.16b
+; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-NEXT:    bic v5.16b, v16.16b, v5.16b
 ; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
 ; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
 ; CHECK-NEXT:    fcvtzs v5.4s, v5.4s
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    xtn v16.4h, v3.4s
-; CHECK-NEXT:    xtn v17.4h, v4.4s
-; CHECK-NEXT:    xtn v18.4h, v5.4s
-; CHECK-NEXT:    xtn v19.4h, v2.4s
-; CHECK-NEXT:    tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-NEXT:    xtn v2.4h, v2.4s
+; CHECK-NEXT:    xtn v3.4h, v3.4s
+; CHECK-NEXT:    xtn v4.4h, v4.4s
+; CHECK-NEXT:    xtn v5.4h, v5.4s
+; CHECK-NEXT:    tbl v2.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b
 ; CHECK-NEXT:    str q2, [x0], #16
 ; CHECK-NEXT:    b.ne .LBB3_9
 ; CHECK-NEXT:  // %bb.10: // %middle.block

diff  --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index 184e8fff154b95..0b09a3d9896ea0 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -74,8 +74,8 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-BE-NEXT:    add x10, x9, #16
 ; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
 ; CHECK-BE-NEXT:    add x11, x9, #32
-; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
 ; CHECK-BE-NEXT:    add x9, x9, #48
+; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
 ; CHECK-BE-NEXT:    ld1 { v3.16b }, [x11]
 ; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
 ; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
@@ -363,21 +363,21 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-BE-NEXT:    add x9, x0, x8, lsl #7
 ; CHECK-BE-NEXT:    add x13, x9, #64
+; CHECK-BE-NEXT:    add x10, x9, #112
+; CHECK-BE-NEXT:    add x11, x9, #96
 ; CHECK-BE-NEXT:    add x12, x9, #80
 ; CHECK-BE-NEXT:    add x14, x9, #16
 ; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
 ; CHECK-BE-NEXT:    ld1 { v16.16b }, [x13]
-; CHECK-BE-NEXT:    add x11, x9, #96
 ; CHECK-BE-NEXT:    add x13, x9, #32
+; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v2.16b }, [x14]
 ; CHECK-BE-NEXT:    ld1 { v17.16b }, [x12]
-; CHECK-BE-NEXT:    add x10, x9, #112
-; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v3.16b }, [x13]
 ; CHECK-BE-NEXT:    ld1 { v18.16b }, [x11]
 ; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
-; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
 ; CHECK-BE-NEXT:    ld1 { v19.16b }, [x10]
+; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
 ; CHECK-BE-NEXT:    add x8, x8, #1
 ; CHECK-BE-NEXT:    cmp x8, #1000
 ; CHECK-BE-NEXT:    tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
@@ -510,8 +510,8 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-BE-NEXT:    add x10, x9, #16
 ; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
 ; CHECK-BE-NEXT:    add x11, x9, #32
-; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
 ; CHECK-BE-NEXT:    add x9, x9, #48
+; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
 ; CHECK-BE-NEXT:    ld1 { v3.16b }, [x11]
 ; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
 ; CHECK-BE-NEXT:    add x9, x1, x8, lsl #3

diff  --git a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
index f0856c43daf1d9..59d3a5c97d65ea 100644
--- a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
@@ -6,7 +6,6 @@ define i32 @uaddlv_uaddlp_v8i16(<8 x i16> %0) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uaddlv s0, v0.8h
 ; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %2 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %0)
   %3 = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %2)

diff  --git a/llvm/test/CodeGen/AArch64/umul_fix_sat.ll b/llvm/test/CodeGen/AArch64/umul_fix_sat.ll
index e9965fd5319b5b..3eb70ff79d8c6f 100644
--- a/llvm/test/CodeGen/AArch64/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/AArch64/umul_fix_sat.ll
@@ -84,8 +84,6 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
 define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-LABEL: vec:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v1.s[1]
 ; CHECK-NEXT:    mov w9, v0.s[1]
 ; CHECK-NEXT:    fmov w10, s0
@@ -98,7 +96,6 @@ define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-NEXT:    csinv w9, w9, wzr, eq
 ; CHECK-NEXT:    fmov s0, w9
 ; CHECK-NEXT:    mov v0.s[1], w8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
   ret <2 x i32> %tmp

diff  --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
index 468a33ce5bfcf6..a07e35e6b3ad37 100644
--- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
@@ -282,7 +282,6 @@ define <2 x i64> @fold_urem_v2i64(<2 x i64> %x) {
 define <1 x i64> @fold_urem_v1i64(<1 x i64> %x) {
 ; CHECK-LABEL: fold_urem_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mov x8, #-3689348814741910324 // =0xcccccccccccccccc
 ; CHECK-NEXT:    mov w10, #10 // =0xa

diff  --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index 557aa010b3a7d9..b095f3598276d7 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -510,7 +510,6 @@ define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
 ; CHECK-NEXT:    ld1.b { v0 }[0], [x8]
 ; CHECK-NEXT:    orr x8, x8, #0x1
 ; CHECK-NEXT:    ld1.b { v0 }[4], [x8]
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:    .loh AdrpLdr Lloh36, Lloh37

diff  --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
index 854a2d75d4b864..f6cc7837d44a0b 100644
--- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -45,10 +45,7 @@ define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
 ; CHECK-LABEL: sin_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
@@ -59,21 +56,16 @@ define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
 ; CHECK-LABEL: sin_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #48
 ; CHECK-NEXT:    ret
   %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x)
@@ -88,20 +80,16 @@ define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -119,27 +107,22 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[3]
 ; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -282,20 +265,16 @@ define <3 x float> @cos_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl cosf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl cosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl cosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -313,20 +292,16 @@ define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl tanf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl tanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl tanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -344,20 +319,16 @@ define <3 x float> @asin_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl asinf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl asinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl asinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -375,20 +346,16 @@ define <3 x float> @acos_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl acosf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl acosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl acosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -406,20 +373,16 @@ define <3 x float> @atan_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl atanf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl atanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl atanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -438,14 +401,10 @@ define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl atan2f
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-NEXT:    bl atan2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -453,7 +412,6 @@ define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
 ; CHECK-NEXT:    mov s1, v1.s[2]
 ; CHECK-NEXT:    bl atan2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -471,20 +429,16 @@ define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinhf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sinhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -501,20 +455,16 @@ define <3 x float> @cosh_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl coshf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl coshf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl coshf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -532,20 +482,16 @@ define <3 x float> @tanh_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl tanhf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl tanhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl tanhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -563,20 +509,16 @@ define <3 x float> @exp_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl expf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl expf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl expf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -594,20 +536,16 @@ define <3 x float> @exp2_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl exp2f
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl exp2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl exp2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -634,20 +572,16 @@ define <3 x float> @log_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl logf
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl logf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl logf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -665,20 +599,16 @@ define <3 x float> @log10_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl log10f
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl log10f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl log10f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -696,20 +626,16 @@ define <3 x float> @log2_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl log2f
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl log2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl log2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
index 4d847fb96cbb72..fd0f8fe306bfa6 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
@@ -29,7 +29,6 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind {
 define i8 @test_v1i8(<1 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a)
@@ -39,7 +38,6 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
 define i16 @test_v1i16(<1 x i16> %a) nounwind {
 ; CHECK-LABEL: test_v1i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a)
@@ -57,7 +55,6 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
 define i32 @test_v1i32(<1 x i32> %a) nounwind {
 ; CHECK-LABEL: test_v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %b = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
@@ -67,7 +64,6 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
 define i64 @test_v1i64(<1 x i64> %a) nounwind {
 ; CHECK-LABEL: test_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %b = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 8473f45f6c803b..1a7a81c321d5c4 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -1765,8 +1765,6 @@ entry:
 define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i32_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uaddlv d0, v0.4s
 ; CHECK-SD-NEXT:    fmov x0, d0
@@ -1873,8 +1871,6 @@ entry:
 define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
 ; CHECK-SD-LABEL: add_pair_v4i16_v4i32_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uaddlv s0, v0.8h
 ; CHECK-SD-NEXT:    fmov w0, s0
@@ -2777,8 +2773,6 @@ entry:
 define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i16_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    movi v2.2d, #0x00ffff0000ffff
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
@@ -3073,8 +3067,6 @@ entry:
 define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v4i8_v4i32_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    bic v1.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
@@ -3195,8 +3187,6 @@ entry:
 define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v8i8_v8i16_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uaddlv h0, v0.16b
 ; CHECK-SD-NEXT:    umov w0, v0.h[0]
@@ -3531,8 +3521,6 @@ entry:
 define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i8_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    movi v2.2d, #0x0000ff000000ff
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
@@ -4561,8 +4549,6 @@ entry:
 define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-BASE-LABEL: full:
 ; CHECK-SD-BASE:       // %bb.0: // %entry
-; CHECK-SD-BASE-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-SD-BASE-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-BASE-NEXT:    sxtw x8, w3
 ; CHECK-SD-BASE-NEXT:    sxtw x9, w1
 ; CHECK-SD-BASE-NEXT:    ldr d0, [x0]
@@ -4617,8 +4603,6 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-DOT:       // %bb.0: // %entry
 ; CHECK-SD-DOT-NEXT:    ldr d0, [x0]
 ; CHECK-SD-DOT-NEXT:    ldr d1, [x2]
-; CHECK-SD-DOT-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-SD-DOT-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-DOT-NEXT:    sxtw x8, w3
 ; CHECK-SD-DOT-NEXT:    sxtw x9, w1
 ; CHECK-SD-DOT-NEXT:    movi v2.2d, #0000000000000000
@@ -4671,8 +4655,6 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ;
 ; CHECK-GI-LABEL: full:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 def $x3
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    sxtw x8, w3
 ; CHECK-GI-NEXT:    ldr d0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index 7fa416e0dbcd5c..0c40e0534e846d 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -29,7 +29,6 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind {
 define i8 @test_v1i8(<1 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
@@ -39,7 +38,6 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
 define i16 @test_v1i16(<1 x i16> %a) nounwind {
 ; CHECK-LABEL: test_v1i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a)
@@ -57,7 +55,6 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
 define i32 @test_v1i32(<1 x i32> %a) nounwind {
 ; CHECK-LABEL: test_v1i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %b = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
@@ -67,7 +64,6 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
 define i64 @test_v1i64(<1 x i64> %a) nounwind {
 ; CHECK-LABEL: test_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %b = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index 58020d28702b2f..6d830c71366eab 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -18,7 +18,6 @@ declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
 define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_and_v1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[0]
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel w0, w0, w1, lt
@@ -112,7 +111,6 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_or_v1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[0]
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel w0, w0, w1, lt

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
index 215b6e086591dc..2fdbfbb9046963 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
@@ -36,7 +36,6 @@ define half @test_v1f16_neutral(<1 x half> %a) nounwind {
 define float @test_v1f32(<1 x float> %a, float %s) nounwind {
 ; CHECK-LABEL: test_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fadd s0, s1, s0
 ; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
@@ -46,8 +45,6 @@ define float @test_v1f32(<1 x float> %a, float %s) nounwind {
 define float @test_v1f32_neutral(<1 x float> %a) nounwind {
 ; CHECK-LABEL: test_v1f32_neutral:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
   ret float %b

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
index a2e5a8a1b4c46f..28803cdb323c4b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
@@ -22,8 +22,6 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 define float @test_v1f32(<1 x float> %a) nounwind {
 ; CHECK-LABEL: test_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
   %b = call reassoc float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
   ret float %b
@@ -60,15 +58,10 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v5f32(<5 x float> %a) nounwind {
 ; CHECK-LABEL: test_v5f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    movi v5.4s, #128, lsl #24
-; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    mov v5.s[0], v4.s[0]
+; CHECK-NEXT:    movi v5.4s, #128, lsl #24
 ; CHECK-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-NEXT:    mov v5.s[0], v4.s[0]
 ; CHECK-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-NEXT:    fadd v0.4s, v0.4s, v5.4s
 ; CHECK-NEXT:    faddp v0.4s, v0.4s, v0.4s

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 1164e02a16c9e0..12fc34f2f2b463 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -1,20 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI-FP16
 
 define float @add_HalfS(<2 x float> %bin.rdx)  {
-; CHECK-SD-LABEL: add_HalfS:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    faddp s0, v0.2s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: add_HalfS:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    faddp s0, v0.2s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: add_HalfS:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddp s0, v0.2s
+; CHECK-NEXT:    ret
   %r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
   ret float %r
 }
@@ -22,7 +16,6 @@ define float @add_HalfS(<2 x float> %bin.rdx)  {
 define half @add_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: add_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -42,7 +35,6 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-SD-FP16-LABEL: add_HalfH:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    faddp h2, v0.2h
 ; CHECK-SD-FP16-NEXT:    mov h0, v0.h[3]
@@ -53,7 +45,6 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-GI-NOFP16-LABEL: add_HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
 ; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
@@ -78,7 +69,6 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: add_HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    fadd h1, h0, h1

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 03db1d0d433d3c..37a3f123fc28de 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -16,7 +16,6 @@ define float @add_HalfS(<2 x float> %bin.rdx)  {
 define half @add_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: add_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -369,7 +368,6 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
 ; CHECK-GI-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-GI-NOFP16-NEXT:    b.ne .LBB10_1
 ; CHECK-GI-NOFP16-NEXT:  // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -501,7 +499,6 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
 ; CHECK-GI-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-GI-NOFP16-NEXT:    b.ne .LBB11_1
 ; CHECK-GI-NOFP16-NEXT:  // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 1d295a30a994b0..8d5b65958b7e70 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -44,27 +44,9 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-NOFP-SD-LABEL: test_v1f32:
-; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NOFP-SD-NEXT:    ret
-;
-; CHECK-FP-SD-LABEL: test_v1f32:
-; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-FP-SD-NEXT:    ret
-;
-; CHECK-NOFP-GI-LABEL: test_v1f32:
-; CHECK-NOFP-GI:       // %bb.0:
-; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-NOFP-GI-NEXT:    ret
-;
-; CHECK-FP-GI-LABEL: test_v1f32:
-; CHECK-FP-GI:       // %bb.0:
-; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-FP-GI-NEXT:    ret
+; CHECK-LABEL: test_v1f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -88,7 +70,6 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -124,7 +105,6 @@ define half @test_v4f16(<4 x half> %a) nounwind {
 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -448,17 +428,9 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #254, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
@@ -527,17 +499,9 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16_ninf:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #254, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index ee2af110c84cd5..106ab214a056f6 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -44,27 +44,9 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-NOFP-SD-LABEL: test_v1f32:
-; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NOFP-SD-NEXT:    ret
-;
-; CHECK-FP-SD-LABEL: test_v1f32:
-; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-FP-SD-NEXT:    ret
-;
-; CHECK-NOFP-GI-LABEL: test_v1f32:
-; CHECK-NOFP-GI:       // %bb.0:
-; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-NOFP-GI-NEXT:    ret
-;
-; CHECK-FP-GI-LABEL: test_v1f32:
-; CHECK-FP-GI:       // %bb.0:
-; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-FP-GI-NEXT:    ret
+; CHECK-LABEL: test_v1f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -88,7 +70,6 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -124,7 +105,6 @@ define half @test_v4f16(<4 x half> %a) nounwind {
 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -448,17 +428,9 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #252, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
@@ -527,17 +499,9 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16_ninf:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    mvni v1.8h, #4, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
index be61f9b5217952..a810f23d92a34b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
@@ -40,27 +40,9 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-NOFP-SD-LABEL: test_v1f32:
-; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NOFP-SD-NEXT:    ret
-;
-; CHECK-FP-SD-LABEL: test_v1f32:
-; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-FP-SD-NEXT:    ret
-;
-; CHECK-NOFP-GI-LABEL: test_v1f32:
-; CHECK-NOFP-GI:       // %bb.0:
-; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-NOFP-GI-NEXT:    ret
-;
-; CHECK-FP-GI-LABEL: test_v1f32:
-; CHECK-FP-GI:       // %bb.0:
-; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-FP-GI-NEXT:    ret
+; CHECK-LABEL: test_v1f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -84,7 +66,6 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -408,17 +389,9 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #252, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 300081dc3ec405..f77e6285f978a8 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -44,27 +44,9 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-NOFP-SD-LABEL: test_v1f32:
-; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NOFP-SD-NEXT:    ret
-;
-; CHECK-FP-SD-LABEL: test_v1f32:
-; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-FP-SD-NEXT:    ret
-;
-; CHECK-NOFP-GI-LABEL: test_v1f32:
-; CHECK-NOFP-GI:       // %bb.0:
-; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-NOFP-GI-NEXT:    ret
-;
-; CHECK-FP-GI-LABEL: test_v1f32:
-; CHECK-FP-GI:       // %bb.0:
-; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-FP-GI-NEXT:    ret
+; CHECK-LABEL: test_v1f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -88,7 +70,6 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -124,7 +105,6 @@ define half @test_v4f16(<4 x half> %a) nounwind {
 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -448,17 +428,9 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #124, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
@@ -527,17 +499,9 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16_ninf:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    mvni v1.8h, #132, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
index e735f670ced0cf..675e584953d942 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
@@ -40,27 +40,9 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-NOFP-SD-LABEL: test_v1f32:
-; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NOFP-SD-NEXT:    ret
-;
-; CHECK-FP-SD-LABEL: test_v1f32:
-; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-FP-SD-NEXT:    ret
-;
-; CHECK-NOFP-GI-LABEL: test_v1f32:
-; CHECK-NOFP-GI:       // %bb.0:
-; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-NOFP-GI-NEXT:    ret
-;
-; CHECK-FP-GI-LABEL: test_v1f32:
-; CHECK-FP-GI:       // %bb.0:
-; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
-; CHECK-FP-GI-NEXT:    ret
+; CHECK-LABEL: test_v1f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -84,7 +66,6 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -408,17 +389,9 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
-; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
-; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
-; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
-; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
-; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
-; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
-; CHECK-FP-NEXT:    mov x8, sp
-; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
-; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #124, lsl #8
+; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
index ce7ae1e426bdac..4b738bef5fa4d5 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
@@ -23,8 +23,6 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 define float @test_v1f32(<1 x float> %a) nounwind {
 ; CHECK-LABEL: test_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 1.0, <1 x float> %a)
   ret float %b

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index e1b21705c95f3e..2636d5b2a34bb3 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -7,13 +7,11 @@
 define float @mul_HalfS(<2 x float> %bin.rdx)  {
 ; CHECK-SD-LABEL: mul_HalfS:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmul s0, s0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mul_HalfS:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NEXT:    ret
@@ -24,7 +22,6 @@ define float @mul_HalfS(<2 x float> %bin.rdx)  {
 define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: mul_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -44,7 +41,6 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-SD-FP16-LABEL: mul_HalfH:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    fmul h1, h0, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fmul h0, h1, v0.h[3]
@@ -53,7 +49,6 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-GI-NOFP16-LABEL: mul_HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
 ; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
@@ -78,7 +73,6 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: mul_HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    fmul h1, h0, h1

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
index da75a805952120..9047c1d09a6935 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
@@ -7,13 +7,11 @@
 define float @mul_HalfS(<2 x float> %bin.rdx)  {
 ; CHECK-SD-LABEL: mul_HalfS:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmul s0, s0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mul_HalfS:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NEXT:    ret
@@ -24,7 +22,6 @@ define float @mul_HalfS(<2 x float> %bin.rdx)  {
 define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: mul_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
-; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -44,7 +41,6 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-SD-FP16-LABEL: mul_HalfH:
 ; CHECK-SD-FP16:       // %bb.0:
-; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    fmul h1, h0, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fmul h0, h1, v0.h[3]
@@ -62,7 +58,6 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: mul_HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 809a6d6556a7be..9b652704fca704 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -31,7 +31,6 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind {
 define i8 @test_v1i8(<1 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v1i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)
@@ -41,7 +40,6 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
 define i16 @test_v1i16(<1 x i16> %a) nounwind {
 ; CHECK-LABEL: test_v1i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a)
@@ -57,31 +55,19 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
 }
 
 define i32 @test_v1i32(<1 x i32> %a) nounwind {
-; CHECK-SD-LABEL: test_v1i32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov w0, s0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_v1i32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fmov w0, s0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_v1i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
   %b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
   ret i32 %b
 }
 
 define i64 @test_v1i64(<1 x i64> %a) nounwind {
-; CHECK-SD-LABEL: test_v1i64:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    fmov x0, d0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_v1i64:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fmov x0, d0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
   %b = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
   ret i64 %b
 }
@@ -209,7 +195,6 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: test_v4i1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]

diff  --git a/llvm/test/CodeGen/AArch64/vector-compress.ll b/llvm/test/CodeGen/AArch64/vector-compress.ll
index 710ea70d678c5b..4f76c92039f1f1 100644
--- a/llvm/test/CodeGen/AArch64/vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/vector-compress.ll
@@ -206,7 +206,6 @@ define <8 x i32> @test_compress_large(<8 x i32> %vec, <8 x i1> %mask) {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    ; kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umov.b w9, v2[0]
 ; CHECK-NEXT:    umov.b w10, v2[1]
 ; CHECK-NEXT:    mov x12, sp
@@ -348,7 +347,6 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shl.4h v1, v1, #15
 ; CHECK-NEXT:    add x8, sp, #8
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str h0, [sp, #8]
 ; CHECK-NEXT:    cmlt.4h v1, v1, #0
 ; CHECK-NEXT:    umov.h w9, v1[0]
@@ -380,7 +378,6 @@ define <4 x i4> @test_compress_illegal_element_type(<4 x i4> %vec, <4 x i1> %mas
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shl.4h v1, v1, #15
 ; CHECK-NEXT:    add x8, sp, #8
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str h0, [sp, #8]
 ; CHECK-NEXT:    cmlt.4h v1, v1, #0
 ; CHECK-NEXT:    umov.h w9, v1[0]

diff  --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
index 063b23275c6167..a4cb06a05968cf 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
@@ -21,9 +21,8 @@ define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0
 define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
 ; CHECK-LABEL: test_copysign_v1f32_v1f64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
 ; CHECK-NEXT:    fcvtn v1.2s, v1.2d
+; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
 ; CHECK-NEXT:    bif.8b v0, v1, v2
 ; CHECK-NEXT:    ret
   %tmp0 = fptrunc <1 x double> %b to <1 x float>
@@ -41,10 +40,8 @@ define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b)
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
 ; CHECK-NEXT:    fcvtl v1.2d, v1.2s
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fneg.2d v2, v2
 ; CHECK-NEXT:    bif.16b v0, v1, v2
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp0 = fpext <1 x float> %b to <1 x double>
   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
@@ -55,11 +52,8 @@ define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b)
 ; CHECK-LABEL: test_copysign_v1f64_v1f64:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg.2d v2, v2
 ; CHECK-NEXT:    bif.16b v0, v1, v2
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %r

diff  --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
index 8f38bdbedc629b..1103cf465e5e3a 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
@@ -212,7 +212,6 @@ define <4 x double> @sitofp_v4i8_double(<4 x i8> %a) {
 define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) {
 ; CHECK-LABEL: sitofp_v8i8_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[2]
 ; CHECK-NEXT:    umov w11, v0.b[4]
@@ -389,7 +388,6 @@ define <4 x double> @uitofp_v4i8_double(<4 x i8> %a) {
 define <8 x double> @uitofp_v8i8_double(<8 x i8> %a) {
 ; CHECK-LABEL: uitofp_v8i8_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[2]
 ; CHECK-NEXT:    umov w11, v0.b[4]

diff  --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index 5503de2b4c5db9..dad322574965af 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -17,7 +17,6 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    fcvt s0, h0
 ; CHECK-NEXT:    fcvt s1, h1
@@ -36,7 +35,6 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
 define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; CHECK-LABEL: llrint_v4i64_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[2]
 ; CHECK-NEXT:    mov h2, v0.h[1]
 ; CHECK-NEXT:    mov h3, v0.h[3]
@@ -373,7 +371,6 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x8, s0
 ; CHECK-NEXT:    fmov d0, x8

diff  --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 602643264e7be0..105fd65f409b9f 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -2,48 +2,15 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon |\
 ; RUN:   FileCheck %s --check-prefixes=CHECK-i32
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon |\
-; RUN:   FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-SD
+; RUN:   FileCheck %s --check-prefixes=CHECK-i64
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon \
 ; RUN:   -global-isel -global-isel-abort=2 2>&1 |\
-; RUN:   FileCheck %s --check-prefixes=CHECK-i32,CHECK-i32-GI
+; RUN:   FileCheck %s --check-prefixes=CHECK-i32
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon \
 ; RUN:   -global-isel -global-isel-abort=2 2>&1 |\
-; RUN:   FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-GI
+; RUN:   FileCheck %s --check-prefixes=CHECK-i64
 
-; CHECK-i32-GI:       warning: Instruction selection used fallback path for lrint_v1f16
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f16
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f16
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f16
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f16
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f16
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v1f32
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f32
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f32
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f32
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f32
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f32
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v1f64
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f64
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f64
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f64
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f64
-; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f64
 
-; CHECK-i64-GI:       warning: Instruction selection used fallback path for lrint_v2f16
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f16
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f16
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f16
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f16
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f32
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f32
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f32
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f32
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f32
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f64
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f64
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f64
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f64
-; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f64
 
 define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v1f16:
@@ -69,7 +36,6 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v2f16:
 ; CHECK-i32:       // %bb.0:
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i32-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-NEXT:    fcvt s0, h0
 ; CHECK-i32-NEXT:    fcvt s1, h1
@@ -79,12 +45,10 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, s1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f16:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    mov h1, v0.h[1]
 ; CHECK-i64-NEXT:    fcvt s0, h0
 ; CHECK-i64-NEXT:    fcvt s1, h1
@@ -103,7 +67,6 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
 define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v4f16:
 ; CHECK-i32:       // %bb.0:
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i32-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-NEXT:    fcvt s2, h0
 ; CHECK-i32-NEXT:    mov h3, v0.h[2]
@@ -127,7 +90,6 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 ;
 ; CHECK-i64-LABEL: lrint_v4f16:
 ; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    mov h1, v0.h[2]
 ; CHECK-i64-NEXT:    mov h2, v0.h[1]
 ; CHECK-i64-NEXT:    mov h3, v0.h[3]
@@ -755,20 +717,12 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; CHECK-i32-NEXT:    fcvtzs v0.2s, v0.2s
 ; CHECK-i32-NEXT:    ret
 ;
-; CHECK-i64-SD-LABEL: lrint_v1f32:
-; CHECK-i64-SD:       // %bb.0:
-; CHECK-i64-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-i64-SD-NEXT:    frintx s0, s0
-; CHECK-i64-SD-NEXT:    fcvtzs x8, s0
-; CHECK-i64-SD-NEXT:    fmov d0, x8
-; CHECK-i64-SD-NEXT:    ret
-;
-; CHECK-i64-GI-LABEL: lrint_v1f32:
-; CHECK-i64-GI:       // %bb.0:
-; CHECK-i64-GI-NEXT:    frintx s0, s0
-; CHECK-i64-GI-NEXT:    fcvtzs x8, s0
-; CHECK-i64-GI-NEXT:    fmov d0, x8
-; CHECK-i64-GI-NEXT:    ret
+; CHECK-i64-LABEL: lrint_v1f32:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    frintx s0, s0
+; CHECK-i64-NEXT:    fcvtzs x8, s0
+; CHECK-i64-NEXT:    fmov d0, x8
+; CHECK-i64-NEXT:    ret
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
   ret <1 x iXLen> %a
 }
@@ -1015,7 +969,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, d1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
-; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
index 3685e9cf85bd6e..e453d618325225 100644
--- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
@@ -10,9 +10,9 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
 ; CHECK-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x0], #32
-; CHECK-NEXT:    fmul v2.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v2.4s, v1.4s, v1.4s
-; CHECK-NEXT:    str q2, [x1, x8]
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    str q0, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB0_1
@@ -50,10 +50,10 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
 ; CHECK-NEXT:  .LBB1_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
-; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v3.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmla v3.4s, v2.4s, v2.4s
-; CHECK-NEXT:    str q3, [x1, x8]
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmla v0.4s, v2.4s, v2.4s
+; CHECK-NEXT:    str q0, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB1_1
@@ -97,11 +97,11 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
 ; CHECK-NEXT:    add x9, x1, x8
 ; CHECK-NEXT:    add x8, x8, #32
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT:    fmul v4.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v4.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmul v5.4s, v2.4s, v2.4s
-; CHECK-NEXT:    fmla v5.4s, v3.4s, v3.4s
-; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x9]
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmul v1.4s, v2.4s, v2.4s
+; CHECK-NEXT:    fmla v1.4s, v3.4s, v3.4s
+; CHECK-NEXT:    st2 { v0.4s, v1.4s }, [x9]
 ; CHECK-NEXT:    b.ne .LBB2_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -149,9 +149,9 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2,
 ; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x9]
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
 ; CHECK-NEXT:    ld2 { v2.4s, v3.4s }, [x10]
-; CHECK-NEXT:    fmul v4.4s, v2.4s, v0.4s
-; CHECK-NEXT:    fmla v4.4s, v1.4s, v3.4s
-; CHECK-NEXT:    str q4, [x2], #16
+; CHECK-NEXT:    fmul v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v3.4s
+; CHECK-NEXT:    str q0, [x2], #16
 ; CHECK-NEXT:    b.ne .LBB3_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -190,9 +190,9 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
 ; CHECK-NEXT:  .LBB4_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x0], #32
-; CHECK-NEXT:    fmul v2.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v2.4s, v1.4s, v1.4s
-; CHECK-NEXT:    str q2, [x1, x8]
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    str q0, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB4_1
@@ -229,10 +229,10 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
 ; CHECK-NEXT:  .LBB5_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
-; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v3.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmla v3.4s, v2.4s, v2.4s
-; CHECK-NEXT:    str q3, [x1, x8]
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmla v0.4s, v2.4s, v2.4s
+; CHECK-NEXT:    str q0, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB5_1
@@ -274,11 +274,11 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
 ; CHECK-NEXT:    add x9, x1, x8
 ; CHECK-NEXT:    add x8, x8, #32
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT:    fmul v4.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v4.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmul v5.4s, v2.4s, v2.4s
-; CHECK-NEXT:    fmla v5.4s, v3.4s, v3.4s
-; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x9]
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmul v1.4s, v2.4s, v2.4s
+; CHECK-NEXT:    fmla v1.4s, v3.4s, v3.4s
+; CHECK-NEXT:    st2 { v0.4s, v1.4s }, [x9]
 ; CHECK-NEXT:    b.ne .LBB6_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -369,16 +369,16 @@ define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) {
 ; CHECK:       .Lfunc_begin8:
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:    ldp q0, q2, [x0]
-; CHECK-NEXT:    ldp q3, q4, [x0, #64]
-; CHECK-NEXT:    ldp q5, q6, [x0, #32]
-; CHECK-NEXT:    ldp q7, q16, [x0, #96]
-; CHECK-NEXT:    mov v0.h[5], v2.h[4]
-; CHECK-NEXT:    zip1 v2.8h, v3.8h, v4.8h
-; CHECK-NEXT:    zip1 v3.8h, v5.8h, v6.8h
-; CHECK-NEXT:    mov v7.h[5], v16.h[4]
-; CHECK-NEXT:    mov v0.s[1], v2.s[0]
-; CHECK-NEXT:    uzp1 v1.4s, v3.4s, v7.4s
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #64]
+; CHECK-NEXT:    ldp q4, q5, [x0, #32]
+; CHECK-NEXT:    ldp q6, q7, [x0, #96]
+; CHECK-NEXT:    mov v0.h[5], v1.h[4]
+; CHECK-NEXT:    zip1 v1.8h, v2.8h, v3.8h
+; CHECK-NEXT:    zip1 v2.8h, v4.8h, v5.8h
+; CHECK-NEXT:    mov v6.h[5], v7.h[4]
+; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v6.4s
 ; CHECK-NEXT:    zip2 v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    st2 { v0.2s, v1.2s }, [x0]
 ; CHECK-NEXT:    str q2, [x0, #64]
@@ -424,23 +424,23 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0]
 ; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    ldr q2, [x2]
 ; CHECK-NEXT:    ldr q3, [x4]
 ; CHECK-NEXT:    ldr q4, [x5]
-; CHECK-NEXT:    ldr q2, [x2]
 ; CHECK-NEXT:    ldr q5, [x3]
 ; CHECK-NEXT:    trn1 v16.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    trn2 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ldr q6, [x6]
 ; CHECK-NEXT:    ldr q7, [x7]
 ; CHECK-NEXT:    trn1 v17.8h, v3.8h, v4.8h
-; CHECK-NEXT:    trn2 v1.8h, v3.8h, v4.8h
 ; CHECK-NEXT:    trn1 v18.8h, v2.8h, v5.8h
+; CHECK-NEXT:    trn2 v1.8h, v3.8h, v4.8h
 ; CHECK-NEXT:    trn2 v2.8h, v2.8h, v5.8h
 ; CHECK-NEXT:    trn1 v19.8h, v6.8h, v7.8h
 ; CHECK-NEXT:    trn2 v3.8h, v6.8h, v7.8h
 ; CHECK-NEXT:    trn1 v4.4s, v16.4s, v17.4s
-; CHECK-NEXT:    trn1 v6.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    trn2 v16.4s, v16.4s, v17.4s
+; CHECK-NEXT:    trn1 v6.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    trn1 v5.4s, v18.4s, v19.4s
 ; CHECK-NEXT:    trn1 v7.4s, v2.4s, v3.4s
@@ -668,11 +668,11 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ext v3.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT:    ext v6.16b, v1.16b, v2.16b, #12
+; CHECK-NEXT:    ext v4.16b, v1.16b, v2.16b, #12
 ; CHECK-NEXT:    zip2 v3.4s, v0.4s, v3.4s
+; CHECK-NEXT:    zip2 v4.4s, v1.4s, v4.4s
 ; CHECK-NEXT:    mov v3.s[0], v0.s[0]
 ; CHECK-NEXT:    ext v0.16b, v2.16b, v0.16b, #12
-; CHECK-NEXT:    zip2 v4.4s, v1.4s, v6.4s
 ; CHECK-NEXT:    mov v4.s[0], v1.s[0]
 ; CHECK-NEXT:    zip2 v5.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    mov v5.s[0], v2.s[0]

diff  --git a/llvm/test/CodeGen/AArch64/win64-fpowi.ll b/llvm/test/CodeGen/AArch64/win64-fpowi.ll
index 3eb74f8394ec4e..8ab7851061f481 100644
--- a/llvm/test/CodeGen/AArch64/win64-fpowi.ll
+++ b/llvm/test/CodeGen/AArch64/win64-fpowi.ll
@@ -62,13 +62,10 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
 ; CHECK-NEXT:    fmov d1, d8
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    fmov d1, d8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    .seh_startepilogue
 ; CHECK-NEXT:    ldr d8, [sp, #40] // 8-byte Folded Reload
@@ -98,21 +95,16 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
 ; CHECK-NEXT:    .seh_save_freg d8, 40
 ; CHECK-NEXT:    .seh_endprologue
 ; CHECK-NEXT:    scvtf s8, w0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    fmov s1, s8
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    fmov s1, s8
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    .seh_startepilogue
 ; CHECK-NEXT:    ldr d8, [sp, #40] // 8-byte Folded Reload
 ; CHECK-NEXT:    .seh_save_freg d8, 40

diff  --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll
index aaf4cad6087403..864735eab6b4ae 100644
--- a/llvm/test/CodeGen/AArch64/win64_vararg.ll
+++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll
@@ -178,7 +178,6 @@ define void @vla(i32, ptr, ...) local_unnamed_addr {
 ; CHECK-NEXT:    add x29, sp, #40
 ; CHECK-NEXT:    .seh_add_fp 40
 ; CHECK-NEXT:    .seh_endprologue
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x8, x29, #24
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    mov x19, x1

diff  --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
index 8a4d6b8c7b789f..7d1fa47e32da1e 100644
--- a/llvm/test/CodeGen/AArch64/xtn.ll
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -23,7 +23,6 @@ entry:
 define i8 @xtn_i64_to_i8(i64 %a) {
 ; CHECK-LABEL: xtn_i64_to_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i64 %a to i8
@@ -33,7 +32,6 @@ entry:
 define i8 @xtn_i128_to_i8(i128 %a) {
 ; CHECK-LABEL: xtn_i128_to_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i128 %a to i8
@@ -52,7 +50,6 @@ entry:
 define i16 @xtn_i64_to_i16(i64 %a) {
 ; CHECK-LABEL: xtn_i64_to_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i64 %a to i16
@@ -62,7 +59,6 @@ entry:
 define i16 @xtn_i128_to_i16(i128 %a) {
 ; CHECK-LABEL: xtn_i128_to_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i128 %a to i16
@@ -72,7 +68,6 @@ entry:
 define i32 @xtn_i64_to_i32(i64 %a) {
 ; CHECK-LABEL: xtn_i64_to_i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i64 %a to i32
@@ -82,7 +77,6 @@ entry:
 define i32 @xtn_i128_to_i32(i128 %a) {
 ; CHECK-LABEL: xtn_i128_to_i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i128 %a to i32
@@ -131,14 +125,12 @@ define <2 x i8> @xtn_v2i128_v2i8(<2 x i128> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov v0.s[1], w2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: xtn_v2i128_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    mov v0.s[1], w2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <2 x i128> %a to <2 x i8>
@@ -169,14 +161,12 @@ define <2 x i16> @xtn_v2i128_v2i16(<2 x i128> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov v0.s[1], w2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: xtn_v2i128_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    mov v0.s[1], w2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <2 x i128> %a to <2 x i16>
@@ -198,14 +188,12 @@ define <2 x i32> @xtn_v2i128_v2i32(<2 x i128> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov v0.s[1], w2
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: xtn_v2i128_v2i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    mov v0.s[1], w2
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <2 x i128> %a to <2 x i32>
@@ -232,7 +220,6 @@ entry:
 define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
 ; CHECK-LABEL: xtn_v3i16_v3i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    umov w1, v0.h[1]
 ; CHECK-NEXT:    umov w2, v0.h[2]
@@ -267,9 +254,6 @@ entry:
 define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
 ; CHECK-SD-LABEL: xtn_v3i64_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    xtn v1.2s, v2.2d
 ; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
@@ -283,9 +267,6 @@ define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x1, d1
 ; CHECK-GI-NEXT:    fmov x2, d2
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <3 x i64> %a to <3 x i8>
@@ -305,9 +286,6 @@ entry:
 define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
 ; CHECK-SD-LABEL: xtn_v3i64_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
@@ -321,7 +299,6 @@ define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <3 x i64> %a to <3 x i16>
@@ -331,9 +308,6 @@ entry:
 define <3 x i32> @xtn_v3i64_v3i32(<3 x i64> %a) {
 ; CHECK-SD-LABEL: xtn_v3i64_v3i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 0d5010113ce0b2..839214a7ccd984 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -27,7 +27,6 @@ entry:
 define i64 @zext_i8_to_i64(i8 %a) {
 ; CHECK-LABEL: zext_i8_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x0, x0, #0xff
 ; CHECK-NEXT:    ret
 entry:
@@ -58,7 +57,6 @@ entry:
 define i64 @zext_i16_to_i64(i16 %a) {
 ; CHECK-LABEL: zext_i16_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x0, x0, #0xffff
 ; CHECK-NEXT:    ret
 entry:
@@ -99,7 +97,6 @@ entry:
 define i64 @zext_i10_to_i64(i10 %a) {
 ; CHECK-LABEL: zext_i10_to_i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x0, x0, #0x3ff
 ; CHECK-NEXT:    ret
 entry:
@@ -237,7 +234,6 @@ define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    mov v0.h[1], w1
 ; CHECK-SD-NEXT:    mov v0.h[2], w2
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i8_v3i16:
@@ -248,7 +244,6 @@ define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    and w8, w2, #0xff
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <3 x i8> %a to <3 x i16>
@@ -293,16 +288,10 @@ define <3 x i64> @zext_v3i8_v3i64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    ushll v1.2d, v3.2s, #0
 ; CHECK-SD-NEXT:    mov v2.b[0], v1.b[0]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i8_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    and x8, x0, #0xff
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    and x10, x2, #0xff
@@ -323,7 +312,6 @@ define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v3i16_v3i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
@@ -343,15 +331,11 @@ define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    ushll v2.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    ushll v0.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    ushll2 v2.2d, v2.4s, #0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i16_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -369,10 +353,8 @@ define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ushll v3.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    ushll2 v2.2d, v0.4s, #0
-; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d0, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i32_v3i64:
@@ -396,7 +378,6 @@ define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
 ; CHECK-SD-NEXT:    mov v0.h[1], w1
 ; CHECK-SD-NEXT:    mov v0.h[2], w2
 ; CHECK-SD-NEXT:    bic v0.4h, #252, lsl #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i10_v3i16:
@@ -407,7 +388,6 @@ define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
 ; CHECK-GI-NEXT:    and w8, w2, #0x3ff
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <3 x i10> %a to <3 x i16>
@@ -443,8 +423,8 @@ define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) {
 ; CHECK-SD-LABEL: zext_v3i10_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
-; CHECK-SD-NEXT:    fmov s1, w2
 ; CHECK-SD-NEXT:    mov w8, #1023 // =0x3ff
+; CHECK-SD-NEXT:    fmov s1, w2
 ; CHECK-SD-NEXT:    dup v2.2d, x8
 ; CHECK-SD-NEXT:    mov v0.s[1], w1
 ; CHECK-SD-NEXT:    ushll v3.2d, v1.2s, #0
@@ -452,15 +432,10 @@ define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) {
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-SD-NEXT:    and v2.8b, v3.8b, v2.8b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i10_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    and x8, x0, #0x3ff
 ; CHECK-GI-NEXT:    and x9, x1, #0x3ff
 ; CHECK-GI-NEXT:    and x10, x2, #0x3ff


        


More information about the llvm-commits mailing list