[llvm-branch-commits] [llvm] 4fc092e - Revert " [AArch64][SME] Enable subreg liveness tracking for AArch64 (#92142)"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 14 10:28:27 PDT 2024
Author: Florian Mayer
Date: 2024-06-14T10:28:22-07:00
New Revision: 4fc092e8e5e92f6906e20c3e6c4fc234c6c76841
URL: https://github.com/llvm/llvm-project/commit/4fc092e8e5e92f6906e20c3e6c4fc234c6c76841
DIFF: https://github.com/llvm/llvm-project/commit/4fc092e8e5e92f6906e20c3e6c4fc234c6c76841.diff
LOG: Revert " [AArch64][SME] Enable subreg liveness tracking for AArch64 (#92142)"
This reverts commit 0113f26fad00e4798883b02eb7a049ea545a13de.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
llvm/test/CodeGen/AArch64/arm64-dup.ll
llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
llvm/test/CodeGen/AArch64/arm64-ld1.ll
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
llvm/test/CodeGen/AArch64/arm64-tbl.ll
llvm/test/CodeGen/AArch64/arm64-zip.ll
llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
llvm/test/CodeGen/AArch64/bf16-shuffle.ll
llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
llvm/test/CodeGen/AArch64/extract-vector-elt.ll
llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
llvm/test/CodeGen/AArch64/fptoi.ll
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
llvm/test/CodeGen/AArch64/insert-subvector.ll
llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
llvm/test/CodeGen/AArch64/seqpairspill.mir
llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
llvm/test/CodeGen/AArch64/shuffles.ll
llvm/test/CodeGen/AArch64/shufflevector.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
llvm/test/CodeGen/AArch64/sve-merging-stores.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
llvm/test/CodeGen/AArch64/tbl-loops.ll
llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
llvm/test/CodeGen/AArch64/vldn_shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 9912190e1bced..7ef7a89b5749f 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -149,7 +149,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
- bool enableSubRegLiveness() const override { return true; }
bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index 444f579f23242..a1712a5ec7a27 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: casp x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: casp x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspa x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspa x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspl x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspl x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index 62af028defde5..ee5fbe39b4492 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -1616,7 +1616,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -1637,7 +1637,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -1658,7 +1658,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -1679,7 +1679,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -1700,7 +1700,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -2343,10 +2343,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: casp x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: casp x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2368,10 +2368,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspa x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspa x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2393,10 +2393,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspl x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspl x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2418,10 +2418,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2443,10 +2443,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -2996,7 +2996,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3017,7 +3017,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3038,7 +3038,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3059,7 +3059,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3080,7 +3080,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3531,7 +3531,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3552,7 +3552,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3573,7 +3573,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3594,7 +3594,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -3615,7 +3615,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
; -O1: ccmp x4, x6, #0, eq
@@ -4072,7 +4072,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4100,7 +4100,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4128,7 +4128,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4156,7 +4156,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4184,7 +4184,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lt
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4745,7 +4745,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4773,7 +4773,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4801,7 +4801,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, ge
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5362,7 +5362,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5390,7 +5390,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5418,7 +5418,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5446,7 +5446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -5474,7 +5474,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, lo
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -6002,7 +6002,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -6030,7 +6030,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -6058,7 +6058,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
@@ -6114,7 +6114,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: cmp x2, x4
-; -O1: csel x9, x5, x3, hs
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x5, x7
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
index f043f99327308..83e383f335637 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
@@ -517,7 +517,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -534,7 +534,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -551,7 +551,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -568,7 +568,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -585,7 +585,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1119,7 +1119,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1136,7 +1136,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1153,7 +1153,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1170,7 +1170,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -2356,10 +2356,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: casp x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: casp x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2379,10 +2379,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspa x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspa x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2402,10 +2402,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspl x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspl x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2425,10 +2425,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2448,10 +2448,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3479,7 +3479,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3498,7 +3498,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3517,7 +3517,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3536,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3555,7 +3555,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -4004,8 +4004,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4025,8 +4025,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4046,8 +4046,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4067,8 +4067,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4088,8 +4088,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4589,8 +4589,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4610,8 +4610,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4631,8 +4631,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4652,8 +4652,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4673,8 +4673,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5174,8 +5174,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5195,8 +5195,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5216,8 +5216,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5237,8 +5237,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5258,8 +5258,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5759,8 +5759,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5780,8 +5780,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5801,8 +5801,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5822,8 +5822,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5843,8 +5843,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
index df7b57e7e18f4..0c3ed9b0f1de0 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
@@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -559,7 +559,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -576,7 +576,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -593,7 +593,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -610,7 +610,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: adds x9, x5, x3
+; -O1: adds x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1127,7 +1127,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1144,7 +1144,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1161,7 +1161,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1178,7 +1178,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1195,7 +1195,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: subs x9, x5, x3
+; -O1: subs x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1674,7 +1674,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1693,7 +1693,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1712,7 +1712,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1731,7 +1731,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -1750,7 +1750,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
+; -O1: and x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -2406,10 +2406,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: casp x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: casp x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2429,10 +2429,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspa x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspa x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2452,10 +2452,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspl x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspl x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2475,10 +2475,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2498,10 +2498,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: and x8, x4, x2
-; -O1: and x9, x5, x3
-; -O1: mvn x8, x8
-; -O1: mvn x9, x9
-; -O1: caspal x4, x5, x8, x9, [x0]
+; -O1: and x9, x7, x3
+; -O1: mvn x10, x8
+; -O1: mvn x11, x9
+; -O1: caspal x4, x5, x10, x11, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
%r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3049,7 +3049,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3087,7 +3087,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3106,7 +3106,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3125,7 +3125,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: orr x8, x4, x2
-; -O1: orr x9, x5, x3
+; -O1: orr x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3574,7 +3574,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3593,7 +3593,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3612,7 +3612,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3631,7 +3631,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -3650,7 +3650,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
; -O1: eor x8, x4, x2
-; -O1: eor x9, x5, x3
+; -O1: eor x9, x7, x3
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
; -O1: ccmp x5, x7, #0, eq
@@ -4099,8 +4099,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4120,8 +4120,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4141,8 +4141,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4162,8 +4162,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4183,8 +4183,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lt
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lt
; -O1: csel x8, x4, x2, lt
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4684,8 +4684,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
;
; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4705,8 +4705,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4726,8 +4726,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4747,8 +4747,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -4768,8 +4768,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
;
; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, ge
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, ge
; -O1: csel x8, x4, x2, ge
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5269,8 +5269,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5290,8 +5290,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5311,8 +5311,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5332,8 +5332,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5353,8 +5353,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, lo
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, lo
; -O1: csel x8, x4, x2, lo
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5854,8 +5854,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: casp x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspa x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5896,8 +5896,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspl x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5917,8 +5917,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
@@ -5938,8 +5938,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
;
; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
-; -O1: cmp x3, x5
-; -O1: csel x9, x5, x3, hs
+; -O1: cmp x3, x7
+; -O1: csel x9, x7, x3, hs
; -O1: csel x8, x4, x2, hs
; -O1: caspal x4, x5, x8, x9, [x0]
; -O1: cmp x4, x6
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
index 80310a11add69..1fe63c9be8c62 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
@@ -53,6 +53,10 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap:
; CHECK-CAS-O1: // %bb.0:
+; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0]
; CHECK-CAS-O1-NEXT: mov v0.d[0], x2
; CHECK-CAS-O1-NEXT: mov v0.d[1], x3
@@ -176,6 +180,10 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-CAS-O1: // %bb.0:
+; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0]
; CHECK-CAS-O1-NEXT: mov v0.d[0], x2
; CHECK-CAS-O1-NEXT: mov v0.d[1], x3
@@ -299,6 +307,10 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire:
; CHECK-CAS-O1: // %bb.0:
+; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0]
; CHECK-CAS-O1-NEXT: mov v0.d[0], x2
; CHECK-CAS-O1-NEXT: mov v0.d[1], x3
@@ -422,6 +434,10 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic:
; CHECK-CAS-O1: // %bb.0:
+; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0]
; CHECK-CAS-O1-NEXT: mov v0.d[0], x2
; CHECK-CAS-O1-NEXT: mov v0.d[1], x3
@@ -642,6 +658,10 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) {
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_return:
; CHECK-CAS-O1: // %bb.0:
+; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0]
; CHECK-CAS-O1-NEXT: mov x0, x2
; CHECK-CAS-O1-NEXT: mov x1, x3
diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
index 7141f53802bff..07fbe5d7310f6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
@@ -27,8 +27,9 @@ BB:
define void @f_undef_15(<8 x i64> %a, ptr %dst) {
; CHECK-LABEL: f_undef_15:
; CHECK: // %bb.0: // %BB
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $q0_q1
; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8], #32
; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8]
; CHECK-NEXT: add x8, x0, #64
@@ -45,17 +46,19 @@ BB:
define void @f_undef_1(<8 x i64> %a, ptr %dst) {
; CHECK-LABEL: f_undef_1:
; CHECK: // %bb.0: // %BB
-; CHECK-NEXT: mov v4.16b, v2.16b
-; CHECK-NEXT: mov v5.16b, v0.16b
+; CHECK-NEXT: mov v16.16b, v0.16b
+; CHECK-NEXT: mov v5.16b, v2.16b
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4
; CHECK-NEXT: mov x8, x0
-; CHECK-NEXT: mov v6.16b, v0.16b
; CHECK-NEXT: mov v2.16b, v1.16b
-; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8], #32
-; CHECK-NEXT: mov v5.16b, v4.16b
+; CHECK-NEXT: mov v4.16b, v3.16b
+; CHECK-NEXT: mov v17.16b, v16.16b
+; CHECK-NEXT: mov v6.16b, v5.16b
+; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x8], #32
; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x8]
; CHECK-NEXT: add x8, x0, #64
-; CHECK-NEXT: st2 { v4.2d, v5.2d }, [x8]
-; CHECK-NEXT: mov v4.16b, v3.16b
+; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8]
; CHECK-NEXT: add x8, x0, #96
; CHECK-NEXT: st2 { v3.2d, v4.2d }, [x8]
; CHECK-NEXT: ret
@@ -70,10 +73,11 @@ define void @noundefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) {
; CHECK-LABEL: noundefs:
; CHECK: // %bb.0: // %BB
; CHECK-NEXT: mov v5.16b, v2.16b
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3
; CHECK-NEXT: mov v4.16b, v0.16b
-; CHECK-NEXT: mov v2.16b, v3.16b
+; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32
-; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0]
+; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0]
; CHECK-NEXT: ret
BB:
%S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 undef, i32 14, i32 7, i32 15>
@@ -85,10 +89,11 @@ define void @undefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) {
; CHECK-LABEL: undefs:
; CHECK: // %bb.0: // %BB
; CHECK-NEXT: mov v5.16b, v2.16b
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3
; CHECK-NEXT: mov v4.16b, v0.16b
-; CHECK-NEXT: mov v2.16b, v3.16b
+; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32
-; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0]
+; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0]
; CHECK-NEXT: ret
BB:
%S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index f0fcafa5302e6..3c8aca5145261 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -146,11 +146,11 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.2d v0, #0000000000000000
+; CHECK-NEXT: movi.2d v2, #0000000000000000
; CHECK-NEXT: uaddlv.4s d1, v0
+; CHECK-NEXT: str d2, [x0, #16]
; CHECK-NEXT: mov.d v0[0], v1[0]
-; CHECK-NEXT: movi.2d v1, #0000000000000000
; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: str d1, [x0, #16]
; CHECK-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
@@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) {
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
; CHECK-NEXT: uaddlv.8h s0, v0
; CHECK-NEXT: mov.h v1[0], v0[0]
-; CHECK-NEXT: ushll.4s v0, v1, #0
-; CHECK-NEXT: ucvtf.4s v0, v0
-; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ushll.4s v1, v1, #0
+; CHECK-NEXT: ucvtf.4s v1, v1
+; CHECK-NEXT: str q1, [x0]
; CHECK-NEXT: ret
entry:
@@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: uaddlv.8h s0, v0
; CHECK-NEXT: mov.h v1[0], v0[0]
-; CHECK-NEXT: ushll.4s v0, v1, #0
-; CHECK-NEXT: ucvtf.4s v0, v0
-; CHECK-NEXT: st1.s { v0 }[2], [x8]
-; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ushll.4s v1, v1, #0
+; CHECK-NEXT: ucvtf.4s v1, v1
+; CHECK-NEXT: st1.s { v1 }[2], [x8]
+; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: ret
entry:
@@ -278,9 +278,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
; CHECK-NEXT: stp q0, q0, [x0, #32]
; CHECK-NEXT: mov.h v2[0], v1[0]
; CHECK-NEXT: bic.4h v2, #255, lsl #8
-; CHECK-NEXT: ushll.4s v1, v2, #0
-; CHECK-NEXT: ucvtf.4s v1, v1
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ushll.4s v2, v2, #0
+; CHECK-NEXT: ucvtf.4s v2, v2
+; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
index 75a96be9b435e..7f20b5e5ee4df 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
@@ -24,6 +24,8 @@ declare i128 @llvm.read_volatile_register.i128(metadata) #1
define void @test_wsr128(i128 noundef %v) #0 {
; CHECK-LE-LABEL: test_wsr128:
; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-LE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
; CHECK-LE-NEXT: msrr S1_2_C3_C4_5, x0, x1
; CHECK-LE-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
index 4a84c673af8cf..37c61d0a4a0fb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -43,6 +43,10 @@ define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
;
; LSE-LABEL: val_compare_and_swap:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
; LSE-NEXT: caspa x2, x3, x4, x5, [x0]
; LSE-NEXT: mov x0, x2
; LSE-NEXT: mov x1, x3
@@ -90,6 +94,10 @@ define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) {
;
; LSE-LABEL: val_compare_and_swap_seqcst:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
; LSE-NEXT: caspal x2, x3, x4, x5, [x0]
; LSE-NEXT: mov x0, x2
; LSE-NEXT: mov x1, x3
@@ -137,6 +145,10 @@ define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) {
;
; LSE-LABEL: val_compare_and_swap_release:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
; LSE-NEXT: caspl x2, x3, x4, x5, [x0]
; LSE-NEXT: mov x0, x2
; LSE-NEXT: mov x1, x3
@@ -184,6 +196,10 @@ define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
;
; LSE-LABEL: val_compare_and_swap_monotonic:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
; LSE-NEXT: casp x2, x3, x4, x5, [x0]
; LSE-NEXT: mov x0, x2
; LSE-NEXT: mov x1, x3
@@ -235,7 +251,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) {
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
-; LSE-NEXT: and x8, x5, x3
+; LSE-NEXT: and x8, x7, x3
; LSE-NEXT: and x9, x4, x2
; LSE-NEXT: mvn x10, x9
; LSE-NEXT: mvn x11, x8
@@ -295,7 +311,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: orr x8, x4, x2
-; LSE-NEXT: orr x9, x5, x3
+; LSE-NEXT: orr x9, x7, x3
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
@@ -352,7 +368,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: adds x8, x4, x2
-; LSE-NEXT: adc x9, x5, x3
+; LSE-NEXT: adc x9, x7, x3
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
@@ -408,7 +424,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: subs x8, x4, x2
-; LSE-NEXT: sbc x9, x5, x3
+; LSE-NEXT: sbc x9, x7, x3
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
@@ -468,8 +484,8 @@ define void @fetch_and_min(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: cmp x2, x4
-; LSE-NEXT: sbcs xzr, x3, x5
-; LSE-NEXT: csel x9, x5, x3, ge
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, ge
; LSE-NEXT: csel x8, x4, x2, ge
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
@@ -530,8 +546,8 @@ define void @fetch_and_max(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: cmp x2, x4
-; LSE-NEXT: sbcs xzr, x3, x5
-; LSE-NEXT: csel x9, x5, x3, lt
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, lt
; LSE-NEXT: csel x8, x4, x2, lt
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
@@ -592,8 +608,8 @@ define void @fetch_and_umin(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: cmp x2, x4
-; LSE-NEXT: sbcs xzr, x3, x5
-; LSE-NEXT: csel x9, x5, x3, hs
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, hs
; LSE-NEXT: csel x8, x4, x2, hs
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
@@ -654,8 +670,8 @@ define void @fetch_and_umax(ptr %p, i128 %bits) {
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
; LSE-NEXT: cmp x2, x4
-; LSE-NEXT: sbcs xzr, x3, x5
-; LSE-NEXT: csel x9, x5, x3, lo
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, lo
; LSE-NEXT: csel x8, x4, x2, lo
; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
@@ -698,8 +714,8 @@ define i128 @atomic_load_seq_cst(ptr %p) {
;
; LSE-LABEL: atomic_load_seq_cst:
; LSE: // %bb.0:
-; LSE-NEXT: mov x2, #0 // =0x0
-; LSE-NEXT: mov x3, #0 // =0x0
+; LSE-NEXT: mov x2, #0
+; LSE-NEXT: mov x3, #0
; LSE-NEXT: caspal x2, x3, x2, x3, [x0]
; LSE-NEXT: mov x0, x2
; LSE-NEXT: mov x1, x3
@@ -731,8 +747,8 @@ define i128 @atomic_load_relaxed(i64, i64, ptr %p) {
;
; LSE-LABEL: atomic_load_relaxed:
; LSE: // %bb.0:
-; LSE-NEXT: mov x0, #0 // =0x0
-; LSE-NEXT: mov x1, #0 // =0x0
+; LSE-NEXT: mov x0, #0
+; LSE-NEXT: mov x1, #0
; LSE-NEXT: casp x0, x1, x0, x1, [x2]
; LSE-NEXT: ret
%r = load atomic i128, ptr %p monotonic, align 16
@@ -763,7 +779,9 @@ define void @atomic_store_seq_cst(i128 %in, ptr %p) {
;
; LSE-LABEL: atomic_store_seq_cst:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
; LSE-NEXT: ldp x4, x5, [x2]
+; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
; LSE-NEXT: .LBB14_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: mov x6, x4
@@ -803,7 +821,9 @@ define void @atomic_store_release(i128 %in, ptr %p) {
;
; LSE-LABEL: atomic_store_release:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
; LSE-NEXT: ldp x4, x5, [x2]
+; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
; LSE-NEXT: .LBB15_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: mov x6, x4
@@ -843,7 +863,9 @@ define void @atomic_store_relaxed(i128 %in, ptr %p) {
;
; LSE-LABEL: atomic_store_relaxed:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
; LSE-NEXT: ldp x4, x5, [x2]
+; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
; LSE-NEXT: .LBB16_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: mov x6, x4
@@ -899,6 +921,10 @@ define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) {
;
; LSE-LABEL: cmpxchg_dead:
; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
; LSE-NEXT: casp x2, x3, x4, x5, [x0]
; LSE-NEXT: ret
cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 979a8b16f4217..2bf5419e54830 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -463,7 +463,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
@@ -480,7 +482,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
@@ -500,13 +504,14 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
; CHECK-GI-LABEL: disguised_dup:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI37_1
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_1]
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
-; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v1
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_0]
-; CHECK-GI-NEXT: tbl.16b v1, { v0, v1 }, v1
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
+; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
; CHECK-GI-NEXT: str q0, [x0]
-; CHECK-GI-NEXT: str q1, [x1]
+; CHECK-GI-NEXT: str q2, [x1]
; CHECK-GI-NEXT: ret
%shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 0>
%dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index fc469a3169deb..628fb550a0532 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -5490,14 +5490,18 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -5509,14 +5513,18 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -5531,14 +5539,18 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>,
define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -5550,14 +5562,18 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x
define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -5572,14 +5588,18 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x
define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #4
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -5591,15 +5611,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -5614,14 +5638,18 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>,
define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #4
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -5633,15 +5661,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -5656,14 +5688,18 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>,
define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -5675,15 +5711,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -5698,14 +5738,18 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>,
define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -5717,15 +5761,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -5740,14 +5788,18 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>,
define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -5759,15 +5811,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -5782,14 +5838,18 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>,
define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -5801,15 +5861,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <
define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -5824,14 +5888,18 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>,
define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -5843,15 +5911,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt
define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -5866,14 +5938,18 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl
define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -5885,15 +5961,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt
define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -5908,14 +5988,18 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl
define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -5927,15 +6011,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %
define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -5950,14 +6038,18 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x
define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -5969,15 +6061,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %
define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -5992,14 +6088,20 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -6011,14 +6113,20 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A,
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -6033,14 +6141,20 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(
define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -6052,14 +6166,20 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr
define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -6074,14 +6194,20 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x
define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #6
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -6093,15 +6219,21 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A,
define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -6116,14 +6248,20 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(
define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #6
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -6135,15 +6273,21 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A,
define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -6158,14 +6302,20 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(
define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #12
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -6177,15 +6327,21 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A,
define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -6200,14 +6356,20 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(
define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #12
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -6219,15 +6381,21 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A,
define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -6242,14 +6410,20 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(
define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #24
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -6261,15 +6435,21 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A,
define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -6284,14 +6464,20 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(
define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -6303,15 +6489,21 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A,
define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -6326,14 +6518,20 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(
define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #12
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -6345,15 +6543,21 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt
define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -6368,14 +6572,20 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f
define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #12
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -6387,15 +6597,21 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt
define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -6410,14 +6626,20 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f
define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #24
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -6429,15 +6651,21 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane
define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -6452,14 +6680,20 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.
define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -6471,15 +6705,21 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane
define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -6494,14 +6734,22 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.
define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #4
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -6513,14 +6761,22 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la
define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -6535,14 +6791,22 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #4
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -6554,14 +6818,22 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -6576,14 +6848,22 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8
define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -6595,15 +6875,23 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la
define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -6618,14 +6906,22 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan
define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -6637,15 +6933,23 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la
define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -6660,14 +6964,22 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan
define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -6679,15 +6991,23 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la
define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -6702,14 +7022,22 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan
define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -6721,15 +7049,23 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la
define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -6744,14 +7080,22 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan
define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -6763,15 +7107,23 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la
define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -6786,14 +7138,22 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan
define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -6805,15 +7165,23 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la
define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -6828,14 +7196,22 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan
define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -6847,15 +7223,23 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -6870,14 +7254,22 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo
define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -6889,15 +7281,23 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -6912,14 +7312,22 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -6931,15 +7339,23 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -6954,14 +7370,22 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64
define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -6973,15 +7397,23 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: str x0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -6996,13 +7428,17 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -7013,13 +7449,17 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -7033,13 +7473,17 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -7050,13 +7494,17 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n
define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -7070,13 +7518,17 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -7088,6 +7540,8 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
; CHECK-SD-LABEL: test_v8i16_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7095,6 +7549,8 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -7108,13 +7564,17 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -7126,6 +7586,8 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
; CHECK-SD-LABEL: test_v4i16_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7133,6 +7595,8 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -7146,13 +7610,17 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -7164,6 +7632,8 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
; CHECK-SD-LABEL: test_v4i32_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7171,6 +7641,8 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -7184,13 +7656,17 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -7202,6 +7678,8 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
; CHECK-SD-LABEL: test_v2i32_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7209,6 +7687,8 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -7222,13 +7702,17 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -7240,6 +7724,8 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
; CHECK-SD-LABEL: test_v2i64_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7247,6 +7733,8 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -7260,13 +7748,17 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -7278,6 +7770,8 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
; CHECK-SD-LABEL: test_v1i64_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7285,6 +7779,8 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -7298,13 +7794,17 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -7316,6 +7816,8 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
; CHECK-SD-LABEL: test_v4f32_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7323,6 +7825,8 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -7336,13 +7840,17 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr)
define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -7354,6 +7862,8 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
; CHECK-SD-LABEL: test_v2f32_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7361,6 +7871,8 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -7374,13 +7886,17 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr)
define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -7392,6 +7908,8 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
; CHECK-SD-LABEL: test_v2f64_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7399,6 +7917,8 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -7412,13 +7932,17 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr)
define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -7430,6 +7954,8 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
; CHECK-SD-LABEL: test_v1f64_post_reg_st2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -7437,6 +7963,8 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -7450,13 +7978,19 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr)
define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -7467,13 +8001,19 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -7487,13 +8027,19 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt
define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -7504,13 +8050,19 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <
define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -7524,13 +8076,19 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -7541,14 +8099,20 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -7562,13 +8126,19 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt
define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -7579,14 +8149,20 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -7600,13 +8176,19 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt
define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -7617,14 +8199,20 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -7638,13 +8226,19 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt
define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -7655,14 +8249,20 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -7676,13 +8276,19 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt
define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -7693,14 +8299,20 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -7714,13 +8326,19 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt
define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -7731,14 +8349,20 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -7752,13 +8376,19 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt
define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -7769,14 +8399,20 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -7790,13 +8426,19 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x floa
define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -7807,14 +8449,20 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -7828,13 +8476,19 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa
define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -7845,14 +8499,20 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -7866,13 +8526,19 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do
define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -7883,14 +8549,20 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -7904,13 +8576,21 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do
define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -7921,13 +8601,21 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -7941,13 +8629,21 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1
define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -7958,13 +8654,21 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <
define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -7978,13 +8682,21 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i
define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -7995,14 +8707,22 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -8016,13 +8736,21 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8
define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -8033,14 +8761,22 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -8054,13 +8790,21 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4
define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -8071,14 +8815,22 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -8092,13 +8844,21 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4
define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -8109,14 +8869,22 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -8130,13 +8898,21 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2
define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -8147,14 +8923,22 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -8168,13 +8952,21 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2
define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -8185,14 +8977,22 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -8206,13 +9006,21 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1
define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -8223,14 +9031,22 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -8244,13 +9060,21 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa
define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -8261,14 +9085,22 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -8282,13 +9114,21 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa
define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -8299,14 +9139,22 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -8320,13 +9168,21 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do
define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -8337,14 +9193,22 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -8358,13 +9222,17 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do
define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -8375,13 +9243,17 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -8395,13 +9267,17 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -8412,13 +9288,17 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C)
define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -8432,13 +9312,17 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -8450,6 +9334,8 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8457,6 +9343,8 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -8470,13 +9358,17 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -8488,6 +9380,8 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8495,6 +9389,8 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -8508,13 +9404,17 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -8526,6 +9426,8 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8533,6 +9435,8 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -8546,13 +9450,17 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -8564,6 +9472,8 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8571,6 +9481,8 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -8584,13 +9496,17 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -8602,6 +9518,8 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8609,6 +9527,8 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -8622,13 +9542,17 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -8640,6 +9564,8 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8647,6 +9573,8 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -8660,13 +9588,17 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -8678,6 +9610,8 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8685,6 +9619,8 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -8698,13 +9634,17 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr)
define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -8716,6 +9656,8 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8723,6 +9665,8 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -8736,13 +9680,17 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr)
define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -8754,6 +9702,8 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8761,6 +9711,8 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -8774,13 +9726,17 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr)
define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -8792,6 +9748,8 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -8799,6 +9757,8 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -8812,13 +9772,19 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr)
define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -8829,13 +9795,19 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -8849,13 +9821,19 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>,
define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -8866,13 +9844,19 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C,
define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -8886,13 +9870,19 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -8903,14 +9893,20 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -8924,13 +9920,19 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>,
define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -8941,14 +9943,20 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -8962,13 +9970,19 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,
define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -8979,14 +9993,20 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -9000,13 +10020,19 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,
define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -9017,14 +10043,20 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -9038,13 +10070,19 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>,
define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -9055,14 +10093,20 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -9076,13 +10120,19 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,
define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -9093,14 +10143,20 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -9114,13 +10170,19 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,
define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -9131,14 +10193,20 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -9152,13 +10220,19 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl
define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -9169,14 +10243,20 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -9190,13 +10270,19 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl
define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #48
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -9207,14 +10293,20 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -9228,13 +10320,19 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x
define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -9245,14 +10343,20 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -9266,13 +10370,21 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x
define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -9283,13 +10395,21 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -9303,13 +10423,21 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>,
define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -9320,13 +10448,21 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C,
define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -9340,13 +10476,21 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x
define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -9357,14 +10501,22 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -9378,13 +10530,21 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>,
define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -9395,14 +10555,22 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -9416,13 +10584,21 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<
define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -9433,14 +10609,22 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -9454,13 +10638,21 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<
define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -9471,14 +10663,22 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -9492,13 +10692,21 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>,
define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -9509,14 +10717,22 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -9530,13 +10746,21 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<
define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -9547,14 +10771,22 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -9568,13 +10800,21 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<
define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -9585,14 +10825,22 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -9606,13 +10854,21 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl
define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -9623,14 +10879,22 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -9644,13 +10908,21 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl
define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #64
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -9661,14 +10933,22 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -9682,13 +10962,21 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x
define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -9699,14 +10987,22 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -9719,13 +11015,17 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x
define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -9736,13 +11036,17 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -9756,13 +11060,17 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr)
define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -9773,13 +11081,17 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -9793,13 +11105,17 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr)
define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #4
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -9811,6 +11127,8 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -9818,6 +11136,8 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -9831,13 +11151,17 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr)
define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #4
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -9849,6 +11173,8 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -9856,6 +11182,8 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -9869,13 +11197,17 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr)
define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #8
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -9887,6 +11219,8 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -9894,6 +11228,8 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -9907,13 +11243,17 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)
define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #8
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -9925,6 +11265,8 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -9932,6 +11274,8 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -9945,13 +11289,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr)
define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -9963,6 +11311,8 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -9970,6 +11320,8 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -9983,13 +11335,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -10001,6 +11357,8 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -10008,6 +11366,8 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -10021,13 +11381,17 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr)
define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #8
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -10039,6 +11403,8 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -10046,6 +11412,8 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -10059,13 +11427,17 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64,
define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #8
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -10077,6 +11449,8 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -10084,6 +11458,8 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -10097,13 +11473,17 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64,
define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -10115,6 +11495,8 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -10122,6 +11504,8 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -10135,13 +11519,17 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64
define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -10153,6 +11541,8 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
@@ -10160,6 +11550,8 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -10173,13 +11565,19 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64
define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -10190,13 +11588,19 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -10210,13 +11614,19 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>
define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -10227,13 +11637,19 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -10247,13 +11663,19 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6
define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #6
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -10264,14 +11686,20 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -10285,13 +11713,19 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>
define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #6
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -10302,14 +11736,20 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -10323,13 +11763,19 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>
define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #12
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -10340,14 +11786,20 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -10361,13 +11813,19 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>
define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #12
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -10378,14 +11836,20 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -10399,13 +11863,19 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>
define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -10416,14 +11886,20 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -10437,13 +11913,19 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -10454,14 +11936,20 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -10475,13 +11963,19 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>
define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #12
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -10492,14 +11986,20 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -10513,13 +12013,19 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x
define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #12
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -10530,14 +12036,20 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -10551,13 +12063,19 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x
define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -10568,14 +12086,20 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -10589,13 +12113,19 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2
define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #24
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -10606,14 +12136,20 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -10627,13 +12163,21 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1
define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #4
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -10644,13 +12188,21 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -10664,13 +12216,21 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>
define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #4
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -10681,13 +12241,21 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -10701,13 +12269,21 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8
define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #8
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -10718,14 +12294,22 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -10739,13 +12323,21 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>
define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #8
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -10756,14 +12348,22 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #1
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -10777,13 +12377,21 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>
define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -10794,14 +12402,22 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -10815,13 +12431,21 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>
define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -10832,14 +12456,22 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -10853,13 +12485,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>
define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -10870,14 +12510,22 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -10891,13 +12539,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -10908,14 +12564,22 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -10929,13 +12593,21 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>
define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -10946,14 +12618,22 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -10967,13 +12647,21 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x
define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #16
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -10984,14 +12672,22 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #2
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -11005,13 +12701,21 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x
define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -11022,14 +12726,22 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -11043,13 +12755,21 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2
define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, #32
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -11060,14 +12780,22 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane:
; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: lsl x8, x2, #3
+; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane:
; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: mov x8, x0
; CHECK-GI-NEXT: add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT: ret
call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index c9d94f945f7af..54b96520dce41 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -351,30 +351,63 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi
define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld2.b { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld2lane_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ld2.b { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld2lane_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.b { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
ret %struct.__neon_int8x16x2_t %tmp2
}
define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld3lane_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld3lane_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
ret %struct.__neon_int8x16x3_t %tmp2
}
define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld4lane_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld4lane_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
ret %struct.__neon_int8x16x4_t %tmp2
}
@@ -385,30 +418,63 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>
define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld2.h { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld2lane_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ld2.h { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld2lane_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.h { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
ret %struct.__neon_int16x8x2_t %tmp2
}
define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld3.h { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld3lane_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld3lane_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
ret %struct.__neon_int16x8x3_t %tmp2
}
define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld4lane_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld4lane_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
ret %struct.__neon_int16x8x4_t %tmp2
}
@@ -419,30 +485,63 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>
define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld2.s { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld2lane_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ld2.s { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld2lane_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.s { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
ret %struct.__neon_int32x4x2_t %tmp2
}
define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld3lane_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld3lane_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
ret %struct.__neon_int32x4x3_t %tmp2
}
define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld4lane_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld4lane_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
ret %struct.__neon_int32x4x4_t %tmp2
}
@@ -453,30 +552,63 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>
define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld2.d { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld2lane_2d:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ld2.d { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld2lane_2d:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ld2.d { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
ret %struct.__neon_int64x2x2_t %tmp2
}
define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld3.d { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld3lane_2d:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld3lane_2d:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
ret %struct.__neon_int64x2x3_t %tmp2
}
define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind {
; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ld4lane_2d:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ld4lane_2d:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
ret %struct.__neon_int64x2x4_t %tmp2
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index ad4b0f377627d..43d5ab5ab54e1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1780,7 +1780,9 @@ define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI126_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
entry:
@@ -1797,9 +1799,11 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: adrp x8, .LCPI127_0
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov b2, v0.b[1]
; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: adrp x8, .LCPI127_0
; CHECK-GI-NEXT: mov b4, v0.b[3]
; CHECK-GI-NEXT: mov b5, v0.b[4]
; CHECK-GI-NEXT: mov b6, v0.b[5]
@@ -1999,7 +2003,9 @@ define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI130_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
entry:
@@ -2016,9 +2022,11 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: adrp x8, .LCPI131_0
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov h2, v0.h[1]
; CHECK-GI-NEXT: mov h3, v0.h[2]
-; CHECK-GI-NEXT: adrp x8, .LCPI131_0
; CHECK-GI-NEXT: mov h4, v0.h[3]
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0]
@@ -2138,7 +2146,9 @@ define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI134_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
entry:
@@ -2155,8 +2165,10 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
index d04bac78377bf..6327679756739 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
@@ -7,11 +7,12 @@ define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v3.4s, #2
; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x0]
; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x1]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1
; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, ptr %a)
@@ -24,16 +25,17 @@ entry:
define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) {
; CHECK-LABEL: copyTuple.QTriple:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1
; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff
; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: mov v3.16b, v1.16b
-; CHECK-NEXT: mov v1.16b, v31.16b
-; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x0]
; CHECK-NEXT: mov v2.16b, v31.16b
; CHECK-NEXT: mov v3.16b, v0.16b
-; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x1]
-; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: mov v4.16b, v1.16b
+; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x0]
+; CHECK-NEXT: mov v3.16b, v31.16b
+; CHECK-NEXT: mov v4.16b, v0.16b
+; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x1]
+; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)
@@ -46,19 +48,20 @@ entry:
define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) {
; CHECK-LABEL: copyTuple.QQuad:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1_q2
; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: mov v4.16b, v2.16b
-; CHECK-NEXT: mov v3.16b, v1.16b
-; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: mov v1.16b, v31.16b
-; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x0]
-; CHECK-NEXT: mov v2.16b, v31.16b
-; CHECK-NEXT: mov v3.16b, v0.16b
+; CHECK-NEXT: mov v3.16b, v31.16b
; CHECK-NEXT: mov v4.16b, v0.16b
-; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x1]
-; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: mov v5.16b, v1.16b
+; CHECK-NEXT: mov v6.16b, v2.16b
+; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0]
+; CHECK-NEXT: mov v4.16b, v31.16b
+; CHECK-NEXT: mov v5.16b, v0.16b
+; CHECK-NEXT: mov v6.16b, v0.16b
+; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1]
+; CHECK-NEXT: mov v0.16b, v3.16b
; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)
diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
index 2044a866b830a..44b92e6ccd088 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
@@ -21,55 +21,121 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
}
define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK-LABEL: tbl2_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl2_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl2_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK-LABEL: tbl2_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl2_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl2_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-LABEL: tbl3_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl3_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl3_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-LABEL: tbl3_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl3_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl3_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-LABEL: tbl4_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl4_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl4_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-LABEL: tbl4_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl4_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl4_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
@@ -107,7 +173,11 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
; CHECK-SD-NEXT: mov.s v0[1], v1[1]
@@ -117,7 +187,11 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI8_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: adrp x8, .LCPI8_0
; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4
@@ -188,15 +262,23 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI9_0
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI9_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
@@ -249,7 +331,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s4, w0
; CHECK-SD-NEXT: mov w8, #32 // =0x20
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: mov.b v4[1], w0
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: mov.b v4[2], w0
; CHECK-SD-NEXT: mov.b v4[3], w0
; CHECK-SD-NEXT: mov.b v4[4], w0
@@ -278,6 +364,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s4, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
@@ -364,7 +454,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: fmov s4, w8
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: mov.b v4[1], w8
; CHECK-SD-NEXT: mov.b v4[2], w8
; CHECK-SD-NEXT: mov.b v4[3], w8
@@ -395,8 +489,12 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: fmov s6, w0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
@@ -502,7 +600,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff
; CHECK-SD-NEXT: adrp x8, .LCPI12_0
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0]
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
; CHECK-SD-NEXT: mov.b v4[0], w0
; CHECK-SD-NEXT: mov.b v4[1], w0
@@ -521,6 +623,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s4, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
@@ -643,6 +749,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: dup.16b v4, w0
; CHECK-SD-NEXT: mov w8, #255 // =0xff
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: mov.b v4[8], w8
; CHECK-SD-NEXT: mov.b v4[9], w8
; CHECK-SD-NEXT: mov.b v4[10], w8
@@ -662,8 +772,12 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s4, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: fmov s6, w8
; CHECK-GI-NEXT: adrp x8, .LCPI13_1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
@@ -765,15 +879,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI14_0
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI14_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
@@ -859,16 +981,24 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b
define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI15_0
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI15_2
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2]
; CHECK-GI-NEXT: adrp x8, .LCPI15_1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1]
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
@@ -955,16 +1085,24 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8>
define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI16_2
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
@@ -1006,55 +1144,121 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
}
define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-LABEL: tbx2_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbx2_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbx2_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-LABEL: tbx2_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbx2_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbx2_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-LABEL: tbx3_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbx3_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbx3_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-LABEL: tbx3_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbx3_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbx3_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
-; CHECK-LABEL: tbx4_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbx4_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbx4_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
-; CHECK-LABEL: tbx4_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbx4_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbx4_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
ret <16 x i8> %tmp3
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index fd862dfcbd693..9955b253f563e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -359,18 +359,20 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
; CHECK-SD-LABEL: combine_v8i16_8first:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov d31, d1
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI25_0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI25_0]
-; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1
+; CHECK-SD-NEXT: fmov d2, d0
+; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0]
+; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_v8i16_8first:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
; CHECK-GI-NEXT: adrp x8, .LCPI25_0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI25_0]
-; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0
+; CHECK-GI-NEXT: fmov d31, d1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <16 x i8> %3
@@ -381,18 +383,20 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
; CHECK-SD-LABEL: combine_v8i16_8firstundef:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov d31, d1
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI26_0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI26_0]
-; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1
+; CHECK-SD-NEXT: fmov d2, d0
+; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_v8i16_8firstundef:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
; CHECK-GI-NEXT: adrp x8, .LCPI26_0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI26_0]
-; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0
+; CHECK-GI-NEXT: fmov d31, d1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
ret <16 x i8> %3
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
index 66f3c5c93fcbf..98033a8e449ff 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
@@ -103,8 +103,8 @@ define fp128 @test_rmw_xchg_f128(ptr %dst, fp128 %new) {
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
; LSE-NEXT: mov x7, x5
; LSE-NEXT: mov x6, x4
-; LSE-NEXT: mov x4, x6
; LSE-NEXT: mov x5, x7
+; LSE-NEXT: mov x4, x6
; LSE-NEXT: caspal x4, x5, x2, x3, [x0]
; LSE-NEXT: cmp x5, x7
; LSE-NEXT: ccmp x4, x6, #0, eq
diff --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
index f10b7282669ae..d59de3c56f4ee 100644
--- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
@@ -224,11 +224,14 @@ define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) {
; CHECK-LABEL: shuffle3step0_bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI16_0
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT: mov v3.16b, v2.16b
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: adrp x8, .LCPI16_1
-; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
-; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
entry:
%s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
@@ -239,11 +242,14 @@ define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) {
; CHECK-LABEL: shuffle3step1_bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI17_0
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT: mov v3.16b, v2.16b
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: adrp x8, .LCPI17_1
-; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_1]
-; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
entry:
%s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
@@ -254,11 +260,14 @@ define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) {
; CHECK-LABEL: shuffle3step2_bf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI18_0
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT: mov v3.16b, v2.16b
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: adrp x8, .LCPI18_1
-; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
-; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
entry:
%s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
diff --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
index dbbfbea9176f6..5cfa59a302239 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
@@ -78,9 +78,9 @@ entry:
define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test5:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: ld1r { v1.16b }, [x1]
-; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
index eae724870fb9d..039025dafa0d6 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
@@ -111,8 +111,8 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> %
; CHECK-NEXT: fmul v17.2s, v6.2s, v5.2s
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s
-; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0
; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s
+; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0
; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: fneg v16.2s, v5.2s
; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #90
@@ -162,19 +162,19 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p
; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0]
; CHECK-NEXT: ld2 { v2.2s, v3.2s }, [x1]
; CHECK-NEXT: fmul v4.2s, v3.2s, v1.2s
-; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s
+; CHECK-NEXT: fmul v6.2s, v2.2s, v1.2s
; CHECK-NEXT: fneg v4.2s, v4.2s
-; CHECK-NEXT: fmla v1.2s, v0.2s, v3.2s
+; CHECK-NEXT: fmla v6.2s, v0.2s, v3.2s
; CHECK-NEXT: fmla v4.2s, v0.2s, v2.2s
; CHECK-NEXT: str d4, [x4]
; CHECK-NEXT: ldr q5, [x2]
-; CHECK-NEXT: ext v2.16b, v5.16b, v5.16b, #8
-; CHECK-NEXT: zip1 v0.2s, v5.2s, v2.2s
-; CHECK-NEXT: zip2 v2.2s, v5.2s, v2.2s
-; CHECK-NEXT: fmul v3.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s
-; CHECK-NEXT: fmla v3.2s, v4.2s, v2.2s
-; CHECK-NEXT: fneg v2.2s, v1.2s
+; CHECK-NEXT: ext v7.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT: zip1 v0.2s, v5.2s, v7.2s
+; CHECK-NEXT: zip2 v1.2s, v5.2s, v7.2s
+; CHECK-NEXT: fmul v3.2s, v0.2s, v6.2s
+; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s
+; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s
+; CHECK-NEXT: fneg v2.2s, v6.2s
; CHECK-NEXT: fmla v2.2s, v4.2s, v0.2s
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5]
@@ -241,20 +241,20 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, <
; CHECK-NEXT: zip1 v3.2s, v3.2s, v17.2s
; CHECK-NEXT: fmul v18.2s, v6.2s, v7.2s
; CHECK-NEXT: fmul v5.2s, v19.2s, v16.2s
-; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s
; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s
+; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s
; CHECK-NEXT: fneg v4.2s, v18.2s
; CHECK-NEXT: fmla v5.2s, v3.2s, v2.2s
-; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s
; CHECK-NEXT: fneg v2.2s, v16.2s
+; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s
; CHECK-NEXT: fmla v4.2s, v1.2s, v0.2s
-; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s
; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s
+; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s
; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s
; CHECK-NEXT: str d4, [x0]
+; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s
; CHECK-NEXT: fneg v16.2s, v0.2s
; CHECK-NEXT: zip1 v0.4s, v2.4s, v5.4s
-; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s
; CHECK-NEXT: fmla v16.2s, v2.2s, v4.2s
; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index c8dc092bb05e4..0481d997d24fa 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -920,8 +920,10 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
; CHECK-GI-NEXT: sub sp, sp, #16
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov x9, sp
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: and x8, x8, #0x3
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 4253b06e1f1ac..0a3b9a070c2b3 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -39,9 +39,9 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: add x8, x8, #1
; CHECK-NEXT: cmp x8, #1000
; CHECK-NEXT: ldp q2, q1, [x9]
-; CHECK-NEXT: fcvtzu.4s v3, v1
-; CHECK-NEXT: fcvtzu.4s v2, v2
-; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v0
+; CHECK-NEXT: fcvtzu.4s v4, v1
+; CHECK-NEXT: fcvtzu.4s v3, v2
+; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0
; CHECK-NEXT: str d1, [x1], #16
; CHECK-NEXT: b.eq LBB0_1
; CHECK-NEXT: ; %bb.2: ; %exit
@@ -252,12 +252,12 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: add x8, x8, #1
; CHECK-NEXT: cmp x8, #1000
; CHECK-NEXT: ldp q2, q1, [x9, #32]
-; CHECK-NEXT: fcvtzu.4s v5, v1
+; CHECK-NEXT: fcvtzu.4s v7, v1
; CHECK-NEXT: ldp q1, q3, [x9]
-; CHECK-NEXT: fcvtzu.4s v4, v2
-; CHECK-NEXT: fcvtzu.4s v3, v3
-; CHECK-NEXT: fcvtzu.4s v2, v1
-; CHECK-NEXT: tbl.16b v1, { v2, v3, v4, v5 }, v0
+; CHECK-NEXT: fcvtzu.4s v6, v2
+; CHECK-NEXT: fcvtzu.4s v5, v3
+; CHECK-NEXT: fcvtzu.4s v4, v1
+; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0
; CHECK-NEXT: str q1, [x1], #32
; CHECK-NEXT: b.eq LBB4_1
; CHECK-NEXT: ; %bb.2: ; %exit
@@ -316,20 +316,20 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) {
; CHECK-NEXT: ldp q3, q4, [x9, #32]
; CHECK-NEXT: ldp q5, q6, [x10]
; CHECK-NEXT: fcvtzu.4s v19, v1
-; CHECK-NEXT: ldp q7, q1, [x9]
-; CHECK-NEXT: fcvtzu.4s v4, v4
; CHECK-NEXT: fcvtzu.4s v18, v2
-; CHECK-NEXT: fcvtzu.4s v3, v3
+; CHECK-NEXT: ldp q2, q1, [x9]
+; CHECK-NEXT: fcvtzu.4s v23, v4
; CHECK-NEXT: fcvtzu.4s v17, v6
-; CHECK-NEXT: fcvtzu.4s v16, v5
; CHECK-NEXT: add x9, x2, x8, lsl #5
-; CHECK-NEXT: fcvtzu.4s v2, v1
-; CHECK-NEXT: fcvtzu.4s v1, v7
+; CHECK-NEXT: fcvtzu.4s v22, v3
+; CHECK-NEXT: fcvtzu.4s v16, v5
; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: fcvtzu.4s v21, v1
; CHECK-NEXT: cmp x8, #1000
-; CHECK-NEXT: tbl.16b v5, { v16, v17, v18, v19 }, v0
-; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
-; CHECK-NEXT: stp q1, q5, [x9]
+; CHECK-NEXT: fcvtzu.4s v20, v2
+; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0
+; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0
+; CHECK-NEXT: stp q2, q1, [x9]
; CHECK-NEXT: b.eq LBB5_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index e38394f2b0533..3b8054a635bcd 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -1483,12 +1483,12 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) {
; CHECK-SD-NEXT: adrp x8, .LCPI70_0
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI70_0]
-; CHECK-SD-NEXT: xtn v3.2s, v3.2d
-; CHECK-SD-NEXT: xtn v2.2s, v2.2d
-; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: xtn v0.2s, v0.2d
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT: xtn v6.2s, v3.2d
+; CHECK-SD-NEXT: xtn v5.2s, v2.2d
+; CHECK-SD-NEXT: xtn v4.2s, v1.2d
+; CHECK-SD-NEXT: xtn v3.2s, v0.2d
+; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI70_0]
+; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v8f64_v8i16:
@@ -1514,12 +1514,12 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) {
; CHECK-SD-NEXT: adrp x8, .LCPI71_0
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI71_0]
-; CHECK-SD-NEXT: xtn v3.2s, v3.2d
-; CHECK-SD-NEXT: xtn v2.2s, v2.2d
-; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: xtn v0.2s, v0.2d
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT: xtn v6.2s, v3.2d
+; CHECK-SD-NEXT: xtn v5.2s, v2.2d
+; CHECK-SD-NEXT: xtn v4.2s, v1.2d
+; CHECK-SD-NEXT: xtn v3.2s, v0.2d
+; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI71_0]
+; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v8f64_v8i16:
@@ -1545,21 +1545,21 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) {
; CHECK-SD-NEXT: adrp x8, .LCPI72_0
; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI72_0]
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT: xtn v3.2s, v3.2d
-; CHECK-SD-NEXT: xtn v7.2s, v7.2d
-; CHECK-SD-NEXT: xtn v2.2s, v2.2d
-; CHECK-SD-NEXT: xtn v6.2s, v6.2d
-; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: xtn v5.2s, v5.2d
-; CHECK-SD-NEXT: xtn v0.2s, v0.2d
-; CHECK-SD-NEXT: xtn v4.2s, v4.2d
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT: xtn v19.2s, v3.2d
+; CHECK-SD-NEXT: xtn v23.2s, v7.2d
+; CHECK-SD-NEXT: xtn v18.2s, v2.2d
+; CHECK-SD-NEXT: xtn v22.2s, v6.2d
+; CHECK-SD-NEXT: xtn v17.2s, v1.2d
+; CHECK-SD-NEXT: xtn v21.2s, v5.2d
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI72_0]
+; CHECK-SD-NEXT: xtn v16.2s, v0.2d
+; CHECK-SD-NEXT: xtn v20.2s, v4.2d
+; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v16f64_v16i16:
@@ -1592,21 +1592,21 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) {
; CHECK-SD-NEXT: adrp x8, .LCPI73_0
; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI73_0]
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT: xtn v3.2s, v3.2d
-; CHECK-SD-NEXT: xtn v7.2s, v7.2d
-; CHECK-SD-NEXT: xtn v2.2s, v2.2d
-; CHECK-SD-NEXT: xtn v6.2s, v6.2d
-; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: xtn v5.2s, v5.2d
-; CHECK-SD-NEXT: xtn v0.2s, v0.2d
-; CHECK-SD-NEXT: xtn v4.2s, v4.2d
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT: xtn v19.2s, v3.2d
+; CHECK-SD-NEXT: xtn v23.2s, v7.2d
+; CHECK-SD-NEXT: xtn v18.2s, v2.2d
+; CHECK-SD-NEXT: xtn v22.2s, v6.2d
+; CHECK-SD-NEXT: xtn v17.2s, v1.2d
+; CHECK-SD-NEXT: xtn v21.2s, v5.2d
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI73_0]
+; CHECK-SD-NEXT: xtn v16.2s, v0.2d
+; CHECK-SD-NEXT: xtn v20.2s, v4.2d
+; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v16f64_v16i16:
@@ -1634,48 +1634,65 @@ entry:
define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) {
; CHECK-SD-LABEL: fptos_v32f64_v32i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ldp q16, q17, [sp, #64]
+; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset b8, -8
+; CHECK-SD-NEXT: .cfi_offset b9, -16
+; CHECK-SD-NEXT: .cfi_offset b10, -24
+; CHECK-SD-NEXT: .cfi_offset b11, -32
+; CHECK-SD-NEXT: .cfi_offset b12, -40
+; CHECK-SD-NEXT: .cfi_offset b13, -48
+; CHECK-SD-NEXT: .cfi_offset b14, -56
+; CHECK-SD-NEXT: .cfi_offset b15, -64
; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d
-; CHECK-SD-NEXT: ldp q18, q19, [sp, #96]
-; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-SD-NEXT: ldp q20, q21, [sp, #32]
-; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-SD-NEXT: ldp q22, q23, [sp]
-; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d
+; CHECK-SD-NEXT: adrp x8, .LCPI74_0
+; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d
+; CHECK-SD-NEXT: ldp q20, q21, [sp, #160]
+; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d
+; CHECK-SD-NEXT: ldp q23, q24, [sp, #96]
; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d
-; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT: ldp q16, q17, [sp, #128]
+; CHECK-SD-NEXT: xtn v3.2s, v3.2d
; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d
-; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d
; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d
-; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d
-; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT: xtn v2.2s, v18.2d
+; CHECK-SD-NEXT: ldp q18, q25, [sp, #64]
+; CHECK-SD-NEXT: xtn v1.2s, v19.2d
+; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d
; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d
-; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT: xtn v0.2s, v22.2d
+; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d
+; CHECK-SD-NEXT: xtn v29.2s, v7.2d
+; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d
+; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d
; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d
-; CHECK-SD-NEXT: xtn v3.2s, v3.2d
-; CHECK-SD-NEXT: xtn v2.2s, v2.2d
-; CHECK-SD-NEXT: adrp x8, .LCPI74_0
-; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: xtn v0.2s, v0.2d
-; CHECK-SD-NEXT: xtn v7.2s, v7.2d
-; CHECK-SD-NEXT: xtn v6.2s, v6.2d
-; CHECK-SD-NEXT: xtn v21.2s, v21.2d
-; CHECK-SD-NEXT: xtn v25.2s, v19.2d
-; CHECK-SD-NEXT: xtn v5.2s, v5.2d
-; CHECK-SD-NEXT: xtn v20.2s, v20.2d
-; CHECK-SD-NEXT: xtn v24.2s, v18.2d
-; CHECK-SD-NEXT: xtn v19.2s, v23.2d
-; CHECK-SD-NEXT: xtn v23.2s, v17.2d
-; CHECK-SD-NEXT: xtn v4.2s, v4.2d
-; CHECK-SD-NEXT: xtn v18.2s, v22.2d
-; CHECK-SD-NEXT: xtn v22.2s, v16.2d
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI74_0]
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b
+; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT: xtn v15.2s, v21.2d
+; CHECK-SD-NEXT: xtn v11.2s, v19.2d
+; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT: xtn v14.2s, v20.2d
+; CHECK-SD-NEXT: xtn v10.2s, v22.2d
+; CHECK-SD-NEXT: xtn v13.2s, v17.2d
+; CHECK-SD-NEXT: xtn v9.2s, v7.2d
+; CHECK-SD-NEXT: xtn v28.2s, v6.2d
+; CHECK-SD-NEXT: xtn v8.2s, v18.2d
+; CHECK-SD-NEXT: xtn v12.2s, v16.2d
+; CHECK-SD-NEXT: xtn v27.2s, v5.2d
+; CHECK-SD-NEXT: xtn v26.2s, v4.2d
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI74_0]
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
+; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
+; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
+; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v32f64_v32i16:
@@ -1721,48 +1738,65 @@ entry:
define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) {
; CHECK-SD-LABEL: fptou_v32f64_v32i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ldp q16, q17, [sp, #64]
+; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset b8, -8
+; CHECK-SD-NEXT: .cfi_offset b9, -16
+; CHECK-SD-NEXT: .cfi_offset b10, -24
+; CHECK-SD-NEXT: .cfi_offset b11, -32
+; CHECK-SD-NEXT: .cfi_offset b12, -40
+; CHECK-SD-NEXT: .cfi_offset b13, -48
+; CHECK-SD-NEXT: .cfi_offset b14, -56
+; CHECK-SD-NEXT: .cfi_offset b15, -64
; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d
-; CHECK-SD-NEXT: ldp q18, q19, [sp, #96]
-; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-SD-NEXT: ldp q20, q21, [sp, #32]
-; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-SD-NEXT: ldp q22, q23, [sp]
-; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d
+; CHECK-SD-NEXT: adrp x8, .LCPI75_0
+; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d
+; CHECK-SD-NEXT: ldp q20, q21, [sp, #160]
+; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d
+; CHECK-SD-NEXT: ldp q23, q24, [sp, #96]
; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d
-; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT: ldp q16, q17, [sp, #128]
+; CHECK-SD-NEXT: xtn v3.2s, v3.2d
; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d
-; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d
; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d
-; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d
-; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT: xtn v2.2s, v18.2d
+; CHECK-SD-NEXT: ldp q18, q25, [sp, #64]
+; CHECK-SD-NEXT: xtn v1.2s, v19.2d
+; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d
; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d
-; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT: xtn v0.2s, v22.2d
+; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d
+; CHECK-SD-NEXT: xtn v29.2s, v7.2d
+; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d
+; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d
; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d
-; CHECK-SD-NEXT: xtn v3.2s, v3.2d
-; CHECK-SD-NEXT: xtn v2.2s, v2.2d
-; CHECK-SD-NEXT: adrp x8, .LCPI75_0
-; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: xtn v0.2s, v0.2d
-; CHECK-SD-NEXT: xtn v7.2s, v7.2d
-; CHECK-SD-NEXT: xtn v6.2s, v6.2d
-; CHECK-SD-NEXT: xtn v21.2s, v21.2d
-; CHECK-SD-NEXT: xtn v25.2s, v19.2d
-; CHECK-SD-NEXT: xtn v5.2s, v5.2d
-; CHECK-SD-NEXT: xtn v20.2s, v20.2d
-; CHECK-SD-NEXT: xtn v24.2s, v18.2d
-; CHECK-SD-NEXT: xtn v19.2s, v23.2d
-; CHECK-SD-NEXT: xtn v23.2s, v17.2d
-; CHECK-SD-NEXT: xtn v4.2s, v4.2d
-; CHECK-SD-NEXT: xtn v18.2s, v22.2d
-; CHECK-SD-NEXT: xtn v22.2s, v16.2d
-; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI75_0]
-; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b
-; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b
+; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT: xtn v15.2s, v21.2d
+; CHECK-SD-NEXT: xtn v11.2s, v19.2d
+; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT: xtn v14.2s, v20.2d
+; CHECK-SD-NEXT: xtn v10.2s, v22.2d
+; CHECK-SD-NEXT: xtn v13.2s, v17.2d
+; CHECK-SD-NEXT: xtn v9.2s, v7.2d
+; CHECK-SD-NEXT: xtn v28.2s, v6.2d
+; CHECK-SD-NEXT: xtn v8.2s, v18.2d
+; CHECK-SD-NEXT: xtn v12.2s, v16.2d
+; CHECK-SD-NEXT: xtn v27.2s, v5.2d
+; CHECK-SD-NEXT: xtn v26.2s, v4.2d
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI75_0]
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
+; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
+; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
+; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v32f64_v32i16:
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 2d0931fb4f525..d620a8851ee44 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -3365,111 +3365,111 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
; CHECK-NEXT: cmn w10, #128
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w13, #127
-; CHECK-NEXT: mov v0.s[1], w11
; CHECK-NEXT: csel w12, w13, w8, lt
+; CHECK-NEXT: mov v0.s[1], w11
; CHECK-NEXT: fcvtzs w11, d1
; CHECK-NEXT: cmn w12, #128
; CHECK-NEXT: csel w12, w12, w9, gt
; CHECK-NEXT: fmov s1, w12
; CHECK-NEXT: fcvtzs w12, d2
; CHECK-NEXT: mov d2, v3.d[1]
-; CHECK-NEXT: mov w13, v0.s[1]
; CHECK-NEXT: cmp w11, #127
+; CHECK-NEXT: mov w13, v0.s[1]
; CHECK-NEXT: mov v1.s[1], w10
; CHECK-NEXT: csel w10, w11, w8, lt
; CHECK-NEXT: cmn w10, #128
; CHECK-NEXT: fcvtzs w11, d2
-; CHECK-NEXT: mov d2, v4.d[1]
-; CHECK-NEXT: mov v0.b[1], w13
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w12, #127
+; CHECK-NEXT: mov v0.b[1], w13
; CHECK-NEXT: csel w12, w12, w8, lt
; CHECK-NEXT: cmn w12, #128
; CHECK-NEXT: mov w13, v1.s[1]
; CHECK-NEXT: csel w12, w12, w9, gt
; CHECK-NEXT: cmp w11, #127
-; CHECK-NEXT: mov v0.b[2], v1.b[0]
-; CHECK-NEXT: fmov s1, w12
+; CHECK-NEXT: fmov s2, w12
; CHECK-NEXT: fcvtzs w12, d3
-; CHECK-NEXT: mov v1.s[1], w10
+; CHECK-NEXT: mov d3, v4.d[1]
+; CHECK-NEXT: mov v0.b[2], v1.b[0]
+; CHECK-NEXT: mov v2.s[1], w10
; CHECK-NEXT: csel w10, w11, w8, lt
-; CHECK-NEXT: fcvtzs w11, d2
; CHECK-NEXT: cmn w10, #128
-; CHECK-NEXT: mov v0.b[3], w13
-; CHECK-NEXT: mov d2, v5.d[1]
+; CHECK-NEXT: fcvtzs w11, d3
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w12, #127
+; CHECK-NEXT: mov v0.b[3], w13
; CHECK-NEXT: csel w12, w12, w8, lt
; CHECK-NEXT: cmn w12, #128
-; CHECK-NEXT: mov w13, v1.s[1]
+; CHECK-NEXT: mov w13, v2.s[1]
; CHECK-NEXT: csel w12, w12, w9, gt
-; CHECK-NEXT: mov v0.b[4], v1.b[0]
; CHECK-NEXT: cmp w11, #127
-; CHECK-NEXT: fmov s1, w12
+; CHECK-NEXT: fmov s3, w12
; CHECK-NEXT: fcvtzs w12, d4
-; CHECK-NEXT: mov v1.s[1], w10
+; CHECK-NEXT: mov v0.b[4], v2.b[0]
+; CHECK-NEXT: mov d4, v5.d[1]
+; CHECK-NEXT: mov v3.s[1], w10
; CHECK-NEXT: csel w10, w11, w8, lt
-; CHECK-NEXT: mov v0.b[5], w13
; CHECK-NEXT: cmn w10, #128
-; CHECK-NEXT: fcvtzs w11, d2
-; CHECK-NEXT: mov d2, v6.d[1]
+; CHECK-NEXT: mov v0.b[5], w13
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w12, #127
+; CHECK-NEXT: fcvtzs w11, d4
; CHECK-NEXT: csel w12, w12, w8, lt
; CHECK-NEXT: cmn w12, #128
-; CHECK-NEXT: mov w13, v1.s[1]
-; CHECK-NEXT: mov v0.b[6], v1.b[0]
+; CHECK-NEXT: mov w13, v3.s[1]
; CHECK-NEXT: csel w12, w12, w9, gt
-; CHECK-NEXT: cmp w11, #127
-; CHECK-NEXT: fmov s1, w12
+; CHECK-NEXT: mov v0.b[6], v3.b[0]
+; CHECK-NEXT: fmov s4, w12
; CHECK-NEXT: fcvtzs w12, d5
-; CHECK-NEXT: mov v0.b[7], w13
-; CHECK-NEXT: fcvtzs w13, d2
-; CHECK-NEXT: mov d2, v7.d[1]
-; CHECK-NEXT: mov v1.s[1], w10
+; CHECK-NEXT: cmp w11, #127
+; CHECK-NEXT: mov d5, v6.d[1]
+; CHECK-NEXT: mov v4.s[1], w10
; CHECK-NEXT: csel w10, w11, w8, lt
+; CHECK-NEXT: mov v0.b[7], w13
; CHECK-NEXT: cmn w10, #128
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w12, #127
+; CHECK-NEXT: fcvtzs w13, d5
; CHECK-NEXT: csel w11, w12, w8, lt
; CHECK-NEXT: cmn w11, #128
-; CHECK-NEXT: mov w12, v1.s[1]
-; CHECK-NEXT: mov v0.b[8], v1.b[0]
+; CHECK-NEXT: mov w12, v4.s[1]
+; CHECK-NEXT: mov v0.b[8], v4.b[0]
; CHECK-NEXT: csel w11, w11, w9, gt
-; CHECK-NEXT: cmp w13, #127
-; CHECK-NEXT: fmov s1, w11
+; CHECK-NEXT: fmov s5, w11
; CHECK-NEXT: fcvtzs w11, d6
+; CHECK-NEXT: cmp w13, #127
+; CHECK-NEXT: mov d6, v7.d[1]
; CHECK-NEXT: mov v0.b[9], w12
-; CHECK-NEXT: mov v1.s[1], w10
+; CHECK-NEXT: mov v5.s[1], w10
; CHECK-NEXT: csel w10, w13, w8, lt
-; CHECK-NEXT: fcvtzs w13, d2
; CHECK-NEXT: cmn w10, #128
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w11, #127
+; CHECK-NEXT: fcvtzs w13, d6
; CHECK-NEXT: csel w11, w11, w8, lt
; CHECK-NEXT: cmn w11, #128
-; CHECK-NEXT: mov v0.b[10], v1.b[0]
-; CHECK-NEXT: mov w12, v1.s[1]
+; CHECK-NEXT: mov v0.b[10], v5.b[0]
+; CHECK-NEXT: mov w12, v5.s[1]
; CHECK-NEXT: csel w11, w11, w9, gt
-; CHECK-NEXT: cmp w13, #127
-; CHECK-NEXT: fmov s1, w11
+; CHECK-NEXT: fmov s6, w11
; CHECK-NEXT: fcvtzs w11, d7
+; CHECK-NEXT: cmp w13, #127
; CHECK-NEXT: mov v0.b[11], w12
-; CHECK-NEXT: mov v1.s[1], w10
+; CHECK-NEXT: mov v6.s[1], w10
; CHECK-NEXT: csel w10, w13, w8, lt
; CHECK-NEXT: cmn w10, #128
; CHECK-NEXT: csel w10, w10, w9, gt
; CHECK-NEXT: cmp w11, #127
; CHECK-NEXT: csel w8, w11, w8, lt
; CHECK-NEXT: cmn w8, #128
-; CHECK-NEXT: mov v0.b[12], v1.b[0]
-; CHECK-NEXT: mov w11, v1.s[1]
+; CHECK-NEXT: mov v0.b[12], v6.b[0]
+; CHECK-NEXT: mov w11, v6.s[1]
; CHECK-NEXT: csel w8, w8, w9, gt
-; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fmov s7, w8
; CHECK-NEXT: mov v0.b[13], w11
-; CHECK-NEXT: mov v1.s[1], w10
-; CHECK-NEXT: mov v0.b[14], v1.b[0]
-; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v7.s[1], w10
+; CHECK-NEXT: mov v0.b[14], v7.b[0]
+; CHECK-NEXT: mov w8, v7.s[1]
; CHECK-NEXT: mov v0.b[15], w8
; CHECK-NEXT: ret
%x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f)
@@ -3575,32 +3575,26 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
; CHECK-NEXT: cmp w13, w9
; CHECK-NEXT: csel w11, w13, w9, lt
; CHECK-NEXT: fcvtzs w13, d3
-; CHECK-NEXT: fmov s3, w12
; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768
; CHECK-NEXT: csel w11, w11, w8, gt
; CHECK-NEXT: cmp w14, w9
; CHECK-NEXT: csel w14, w14, w9, lt
-; CHECK-NEXT: mov v3.s[1], w10
; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768
; CHECK-NEXT: csel w14, w14, w8, gt
; CHECK-NEXT: cmp w13, w9
; CHECK-NEXT: csel w13, w13, w9, lt
-; CHECK-NEXT: fmov s2, w14
; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768
; CHECK-NEXT: csel w13, w13, w8, gt
; CHECK-NEXT: cmp w15, w9
; CHECK-NEXT: csel w15, w15, w9, lt
-; CHECK-NEXT: mov v2.s[1], w11
; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768
; CHECK-NEXT: csel w16, w15, w8, gt
; CHECK-NEXT: cmp w17, w9
; CHECK-NEXT: csel w15, w17, w9, lt
-; CHECK-NEXT: fmov s1, w16
; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768
; CHECK-NEXT: csel w15, w15, w8, gt
; CHECK-NEXT: cmp w18, w9
; CHECK-NEXT: csel w17, w18, w9, lt
-; CHECK-NEXT: mov v1.s[1], w13
; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768
; CHECK-NEXT: csel w17, w17, w8, gt
; CHECK-NEXT: cmp w0, w9
@@ -3623,32 +3617,38 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
; CHECK-NEXT: cmp w2, w9
; CHECK-NEXT: fcvtzs w5, d0
; CHECK-NEXT: csel w2, w2, w9, lt
-; CHECK-NEXT: fmov s0, w17
+; CHECK-NEXT: fmov s3, w12
; CHECK-NEXT: mov v7.s[1], w18
; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768
; CHECK-NEXT: csel w2, w2, w8, gt
; CHECK-NEXT: cmp w3, w9
; CHECK-NEXT: csel w3, w3, w9, lt
+; CHECK-NEXT: mov v3.s[1], w10
; CHECK-NEXT: fmov s6, w2
-; CHECK-NEXT: mov v0.s[1], w15
; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768
+; CHECK-NEXT: fmov s2, w14
; CHECK-NEXT: csel w3, w3, w8, gt
; CHECK-NEXT: cmp w4, w9
; CHECK-NEXT: csel w4, w4, w9, lt
; CHECK-NEXT: mov v6.s[1], w0
; CHECK-NEXT: cmn w4, #8, lsl #12 // =32768
+; CHECK-NEXT: mov v2.s[1], w11
; CHECK-NEXT: csel w12, w4, w8, gt
; CHECK-NEXT: cmp w5, w9
+; CHECK-NEXT: fmov s1, w16
; CHECK-NEXT: csel w10, w5, w9, lt
; CHECK-NEXT: fmov s5, w12
; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768
; CHECK-NEXT: csel w10, w10, w8, gt
; CHECK-NEXT: cmp w6, w9
+; CHECK-NEXT: mov v1.s[1], w13
; CHECK-NEXT: csel w9, w6, w9, lt
; CHECK-NEXT: mov v5.s[1], w3
+; CHECK-NEXT: fmov s0, w17
; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768
; CHECK-NEXT: csel w8, w9, w8, gt
; CHECK-NEXT: fmov s4, w8
+; CHECK-NEXT: mov v0.s[1], w15
; CHECK-NEXT: adrp x8, .LCPI85_0
; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI85_0]
; CHECK-NEXT: mov v4.s[1], w10
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 62f5e0fe2dcaa..16e04070b6543 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -2751,8 +2751,8 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
; CHECK-NEXT: fcvtzu w12, d2
; CHECK-NEXT: fcvtzu w14, d1
; CHECK-NEXT: fcvtzu w8, d4
-; CHECK-NEXT: fcvtzu w10, d5
; CHECK-NEXT: mov d4, v0.d[1]
+; CHECK-NEXT: fcvtzu w10, d5
; CHECK-NEXT: fcvtzu w13, d3
; CHECK-NEXT: cmp w8, #255
; CHECK-NEXT: fcvtzu w15, d4
@@ -2760,29 +2760,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
; CHECK-NEXT: cmp w9, #255
; CHECK-NEXT: csel w9, w9, w11, lo
; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: fmov s3, w9
+; CHECK-NEXT: fmov s4, w9
; CHECK-NEXT: csel w9, w10, w11, lo
; CHECK-NEXT: cmp w12, #255
; CHECK-NEXT: fcvtzu w10, d0
-; CHECK-NEXT: mov v3.s[1], w8
+; CHECK-NEXT: mov v4.s[1], w8
; CHECK-NEXT: csel w8, w12, w11, lo
; CHECK-NEXT: cmp w13, #255
-; CHECK-NEXT: fmov s2, w8
+; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: csel w8, w13, w11, lo
; CHECK-NEXT: cmp w14, #255
-; CHECK-NEXT: mov v2.s[1], w9
+; CHECK-NEXT: mov v3.s[1], w9
; CHECK-NEXT: csel w9, w14, w11, lo
; CHECK-NEXT: cmp w15, #255
-; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: csel w9, w15, w11, lo
; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: mov v1.s[1], w8
+; CHECK-NEXT: mov v2.s[1], w8
; CHECK-NEXT: csel w8, w10, w11, lo
-; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: adrp x8, .LCPI82_0
-; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0]
+; CHECK-NEXT: mov v1.s[1], w9
+; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
; CHECK-NEXT: ret
%x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f)
ret <8 x i8> %x
@@ -2802,29 +2802,29 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
; CHECK-NEXT: csel w10, w10, w8, lo
; CHECK-NEXT: fmov s0, w10
; CHECK-NEXT: fcvtzu w10, d16
+; CHECK-NEXT: mov d16, v2.d[1]
; CHECK-NEXT: mov v0.s[1], w9
; CHECK-NEXT: fcvtzu w9, d1
-; CHECK-NEXT: mov d1, v2.d[1]
; CHECK-NEXT: cmp w10, #255
; CHECK-NEXT: csel w10, w10, w8, lo
; CHECK-NEXT: cmp w9, #255
; CHECK-NEXT: mov w11, v0.s[1]
; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: fmov s16, w9
-; CHECK-NEXT: fcvtzu w9, d1
-; CHECK-NEXT: mov d1, v3.d[1]
+; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: fcvtzu w9, d16
+; CHECK-NEXT: mov d16, v3.d[1]
; CHECK-NEXT: mov v0.b[1], w11
-; CHECK-NEXT: mov v16.s[1], w10
+; CHECK-NEXT: mov v1.s[1], w10
; CHECK-NEXT: fcvtzu w10, d2
; CHECK-NEXT: cmp w9, #255
; CHECK-NEXT: csel w9, w9, w8, lo
; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: mov w11, v16.s[1]
-; CHECK-NEXT: mov v0.b[2], v16.b[0]
+; CHECK-NEXT: mov w11, v1.s[1]
+; CHECK-NEXT: mov v0.b[2], v1.b[0]
; CHECK-NEXT: csel w10, w10, w8, lo
; CHECK-NEXT: fmov s2, w10
-; CHECK-NEXT: fcvtzu w10, d1
-; CHECK-NEXT: mov d1, v4.d[1]
+; CHECK-NEXT: fcvtzu w10, d16
+; CHECK-NEXT: mov d16, v4.d[1]
; CHECK-NEXT: mov v0.b[3], w11
; CHECK-NEXT: mov v2.s[1], w9
; CHECK-NEXT: fcvtzu w9, d3
@@ -2834,58 +2834,58 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
; CHECK-NEXT: mov w11, v2.s[1]
; CHECK-NEXT: mov v0.b[4], v2.b[0]
; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: fmov s2, w9
-; CHECK-NEXT: fcvtzu w9, d1
-; CHECK-NEXT: mov d1, v5.d[1]
+; CHECK-NEXT: fmov s3, w9
+; CHECK-NEXT: fcvtzu w9, d16
+; CHECK-NEXT: mov d16, v5.d[1]
; CHECK-NEXT: mov v0.b[5], w11
-; CHECK-NEXT: mov v2.s[1], w10
+; CHECK-NEXT: mov v3.s[1], w10
; CHECK-NEXT: fcvtzu w10, d4
; CHECK-NEXT: cmp w9, #255
; CHECK-NEXT: csel w9, w9, w8, lo
; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: mov w11, v2.s[1]
-; CHECK-NEXT: mov v0.b[6], v2.b[0]
+; CHECK-NEXT: mov w11, v3.s[1]
+; CHECK-NEXT: mov v0.b[6], v3.b[0]
; CHECK-NEXT: csel w10, w10, w8, lo
-; CHECK-NEXT: fmov s2, w10
-; CHECK-NEXT: fcvtzu w10, d1
-; CHECK-NEXT: mov d1, v6.d[1]
+; CHECK-NEXT: fmov s4, w10
+; CHECK-NEXT: fcvtzu w10, d16
; CHECK-NEXT: mov v0.b[7], w11
-; CHECK-NEXT: mov v2.s[1], w9
+; CHECK-NEXT: mov v4.s[1], w9
; CHECK-NEXT: fcvtzu w9, d5
+; CHECK-NEXT: mov d5, v6.d[1]
; CHECK-NEXT: cmp w10, #255
; CHECK-NEXT: csel w10, w10, w8, lo
; CHECK-NEXT: cmp w9, #255
-; CHECK-NEXT: mov w11, v2.s[1]
-; CHECK-NEXT: mov v0.b[8], v2.b[0]
+; CHECK-NEXT: mov w11, v4.s[1]
+; CHECK-NEXT: mov v0.b[8], v4.b[0]
; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: fmov s2, w9
-; CHECK-NEXT: fcvtzu w9, d1
-; CHECK-NEXT: mov d1, v7.d[1]
+; CHECK-NEXT: fmov s16, w9
+; CHECK-NEXT: fcvtzu w9, d5
+; CHECK-NEXT: mov d5, v7.d[1]
; CHECK-NEXT: mov v0.b[9], w11
-; CHECK-NEXT: mov v2.s[1], w10
+; CHECK-NEXT: mov v16.s[1], w10
; CHECK-NEXT: fcvtzu w10, d6
; CHECK-NEXT: cmp w9, #255
; CHECK-NEXT: csel w9, w9, w8, lo
; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: mov v0.b[10], v2.b[0]
-; CHECK-NEXT: mov w11, v2.s[1]
+; CHECK-NEXT: mov v0.b[10], v16.b[0]
+; CHECK-NEXT: mov w11, v16.s[1]
; CHECK-NEXT: csel w10, w10, w8, lo
-; CHECK-NEXT: fmov s2, w10
+; CHECK-NEXT: fmov s6, w10
; CHECK-NEXT: fcvtzu w10, d7
; CHECK-NEXT: mov v0.b[11], w11
-; CHECK-NEXT: mov v2.s[1], w9
-; CHECK-NEXT: fcvtzu w9, d1
+; CHECK-NEXT: mov v6.s[1], w9
+; CHECK-NEXT: fcvtzu w9, d5
; CHECK-NEXT: cmp w9, #255
-; CHECK-NEXT: mov v0.b[12], v2.b[0]
-; CHECK-NEXT: mov w11, v2.s[1]
+; CHECK-NEXT: mov v0.b[12], v6.b[0]
+; CHECK-NEXT: mov w11, v6.s[1]
; CHECK-NEXT: csel w9, w9, w8, lo
; CHECK-NEXT: cmp w10, #255
; CHECK-NEXT: csel w8, w10, w8, lo
-; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fmov s5, w8
; CHECK-NEXT: mov v0.b[13], w11
-; CHECK-NEXT: mov v1.s[1], w9
-; CHECK-NEXT: mov v0.b[14], v1.b[0]
-; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v5.s[1], w9
+; CHECK-NEXT: mov v0.b[14], v5.b[0]
+; CHECK-NEXT: mov w8, v5.s[1]
; CHECK-NEXT: mov v0.b[15], w8
; CHECK-NEXT: ret
%x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f)
@@ -2903,8 +2903,8 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
; CHECK-NEXT: fcvtzu w12, d2
; CHECK-NEXT: fcvtzu w14, d1
; CHECK-NEXT: fcvtzu w8, d4
-; CHECK-NEXT: fcvtzu w11, d5
; CHECK-NEXT: mov d4, v0.d[1]
+; CHECK-NEXT: fcvtzu w11, d5
; CHECK-NEXT: fcvtzu w13, d3
; CHECK-NEXT: cmp w8, w10
; CHECK-NEXT: fcvtzu w15, d4
@@ -2912,29 +2912,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: csel w9, w9, w10, lo
; CHECK-NEXT: cmp w11, w10
-; CHECK-NEXT: fmov s3, w9
+; CHECK-NEXT: fmov s4, w9
; CHECK-NEXT: csel w9, w11, w10, lo
; CHECK-NEXT: cmp w12, w10
; CHECK-NEXT: fcvtzu w11, d0
-; CHECK-NEXT: mov v3.s[1], w8
+; CHECK-NEXT: mov v4.s[1], w8
; CHECK-NEXT: csel w8, w12, w10, lo
; CHECK-NEXT: cmp w13, w10
-; CHECK-NEXT: fmov s2, w8
+; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: csel w8, w13, w10, lo
; CHECK-NEXT: cmp w14, w10
-; CHECK-NEXT: mov v2.s[1], w9
+; CHECK-NEXT: mov v3.s[1], w9
; CHECK-NEXT: csel w9, w14, w10, lo
; CHECK-NEXT: cmp w15, w10
-; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: csel w9, w15, w10, lo
; CHECK-NEXT: cmp w11, w10
-; CHECK-NEXT: mov v1.s[1], w8
+; CHECK-NEXT: mov v2.s[1], w8
; CHECK-NEXT: csel w8, w11, w10, lo
-; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: adrp x8, .LCPI84_0
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI84_0]
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0]
+; CHECK-NEXT: mov v1.s[1], w9
+; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
; CHECK-NEXT: ret
%x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f)
ret <8 x i16> %x
@@ -2973,53 +2973,53 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
; CHECK-NEXT: fcvtzu w16, d0
; CHECK-NEXT: csel w11, w11, w8, lo
; CHECK-NEXT: cmp w17, w8
-; CHECK-NEXT: fmov s18, w11
; CHECK-NEXT: mov v19.s[1], w13
; CHECK-NEXT: csel w13, w17, w8, lo
; CHECK-NEXT: cmp w10, w8
; CHECK-NEXT: csel w10, w10, w8, lo
; CHECK-NEXT: cmp w18, w8
-; CHECK-NEXT: fcvtzu w17, d2
+; CHECK-NEXT: fmov s18, w11
; CHECK-NEXT: csel w11, w18, w8, lo
; CHECK-NEXT: cmp w12, w8
-; CHECK-NEXT: mov v18.s[1], w9
+; CHECK-NEXT: fcvtzu w17, d2
; CHECK-NEXT: csel w12, w12, w8, lo
; CHECK-NEXT: cmp w16, w8
-; CHECK-NEXT: fmov s17, w10
+; CHECK-NEXT: fcvtzu w18, d6
+; CHECK-NEXT: mov v18.s[1], w9
; CHECK-NEXT: csel w9, w16, w8, lo
; CHECK-NEXT: cmp w14, w8
-; CHECK-NEXT: fcvtzu w16, d5
+; CHECK-NEXT: fmov s17, w10
; CHECK-NEXT: csel w10, w14, w8, lo
-; CHECK-NEXT: fcvtzu w18, d6
+; CHECK-NEXT: fcvtzu w16, d5
+; CHECK-NEXT: fmov s23, w10
; CHECK-NEXT: cmp w17, w8
-; CHECK-NEXT: fmov s5, w10
+; CHECK-NEXT: fcvtzu w14, d3
; CHECK-NEXT: csel w10, w17, w8, lo
; CHECK-NEXT: cmp w15, w8
-; CHECK-NEXT: fcvtzu w14, d3
; CHECK-NEXT: fcvtzu w17, d4
-; CHECK-NEXT: fmov s16, w12
; CHECK-NEXT: mov v17.s[1], w13
-; CHECK-NEXT: mov v5.s[1], w9
+; CHECK-NEXT: mov v23.s[1], w9
; CHECK-NEXT: csel w9, w15, w8, lo
; CHECK-NEXT: cmp w18, w8
-; CHECK-NEXT: fmov s4, w9
+; CHECK-NEXT: fmov s22, w9
; CHECK-NEXT: csel w9, w18, w8, lo
; CHECK-NEXT: cmp w16, w8
-; CHECK-NEXT: mov v16.s[1], w11
-; CHECK-NEXT: mov v4.s[1], w10
+; CHECK-NEXT: fmov s16, w12
+; CHECK-NEXT: mov v22.s[1], w10
; CHECK-NEXT: csel w10, w16, w8, lo
; CHECK-NEXT: cmp w14, w8
-; CHECK-NEXT: fmov s3, w10
+; CHECK-NEXT: fmov s21, w10
; CHECK-NEXT: csel w10, w14, w8, lo
; CHECK-NEXT: cmp w17, w8
; CHECK-NEXT: csel w8, w17, w8, lo
-; CHECK-NEXT: fmov s2, w8
+; CHECK-NEXT: mov v16.s[1], w11
+; CHECK-NEXT: mov v21.s[1], w9
+; CHECK-NEXT: fmov s20, w8
; CHECK-NEXT: adrp x8, .LCPI85_0
-; CHECK-NEXT: mov v3.s[1], w9
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0]
-; CHECK-NEXT: mov v2.s[1], w10
+; CHECK-NEXT: mov v20.s[1], w10
; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b
+; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
; CHECK-NEXT: ret
%x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f)
ret <16 x i16> %x
diff --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll
index d664421086fef..6828fa9f1508c 100644
--- a/llvm/test/CodeGen/AArch64/insert-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll
@@ -47,10 +47,11 @@ define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) {
; CHECK-LABEL: insert_v16i8_4_15:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3
; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT: mov v3.16b, v1.16b
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
%s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %s2
@@ -145,10 +146,11 @@ define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) {
; CHECK-LABEL: insert_v8i16_2_15:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3
; CHECK-NEXT: adrp x8, .LCPI13_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT: mov v3.16b, v1.16b
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
%s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %s2
@@ -270,6 +272,7 @@ define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) {
define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) {
; CHECK-LABEL: load_v16i8_4_15:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
; CHECK-NEXT: adrp x8, .LCPI24_0
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0]
@@ -490,6 +493,7 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
; CHECK-LABEL: load_v8i16_2_15:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
; CHECK-NEXT: adrp x8, .LCPI40_0
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0]
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 4907abc6e946e..50c0c8b11e751 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1349,14 +1349,18 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI92_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI92_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI92_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI92_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
@@ -1382,8 +1386,9 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) {
;
; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zero:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
; CHECK-GI-NEXT: adrp x8, .LCPI93_0
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI93_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
@@ -1417,8 +1422,9 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) {
;
; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zeroswap:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v31.2d, #0000000000000000
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q31_q0
; CHECK-GI-NEXT: adrp x8, .LCPI94_0
+; CHECK-GI-NEXT: movi v31.2d, #0000000000000000
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI94_0]
; CHECK-GI-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b
; CHECK-GI-NEXT: ret
@@ -1460,7 +1466,9 @@ define <4 x i32> @vselect_equivalent_shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI96_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI96_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
index 8c88d3c33e07c..3f590226c4715 100644
--- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
@@ -267,8 +267,12 @@ entry:
define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; CHECK-LABEL: extract_4_v4i32_badindex:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: adrp x8, .LCPI5_0
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index 1ed9e7cc5254d..de90024a4a257 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -47,7 +47,9 @@ define <8 x i16> @v8i16_2(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: v8i16_2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT: ret
entry:
@@ -80,7 +82,9 @@ define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: v16i8_2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI7_0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
index d315c306aa37a..afcced5dcb9ab 100644
--- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
@@ -137,7 +137,9 @@ define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: shuffle_widen_faili1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI12_0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT: ret
entry:
@@ -150,7 +152,9 @@ define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: shuffle_widen_fail2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI13_0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT: ret
entry:
@@ -163,7 +167,9 @@ define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: shuffle_widen_fail3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI14_0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir
index b29ab7727f65d..0e6c94c44712c 100644
--- a/llvm/test/CodeGen/AArch64/seqpairspill.mir
+++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir
@@ -7,11 +7,11 @@ body: |
bb.0:
; Check the spill/reload sequence for the %0 register
; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX
- ; CHECK-NEXT: STPXi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s128) into %stack.0, align 8)
+ ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8)
; CHECK: INLINEASM
- ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0 :: (load (s128) from %stack.0, align 8)
+ ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8)
; CHECK-NEXT: $xzr = COPY renamable $[[REG2]]
- ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]]
+ ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]]
%0 : xseqpairsclass = IMPLICIT_DEF
%1 : xseqpairsclass = IMPLICIT_DEF
%2 : gpr64common = IMPLICIT_DEF
@@ -27,11 +27,11 @@ body: |
bb.0:
; Check the spill/reload sequence for the %0 register
; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW
- ; CHECK-NEXT: STPWi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s64) into %stack.0, align 4)
+ ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4)
; CHECK: INLINEASM
- ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0 :: (load (s64) from %stack.0, align 4)
+ ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: $xzr = COPY renamable $[[REG2]]
- ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]]
+ ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]]
%0 : wseqpairsclass = IMPLICIT_DEF
%1 : wseqpairsclass = IMPLICIT_DEF
%2 : gpr64common = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
index 4e49a05506784..fb571eff39fe5 100644
--- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
+++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
@@ -21,8 +21,12 @@
define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
; CHECK-LABEL: shuffle4_v4i8_16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: adrp x8, .LCPI0_0
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
; CHECK-NEXT: ret
%x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -43,8 +47,12 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i
define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
; CHECK-LABEL: shuffle4_v4i8_8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: adrp x8, .LCPI1_0
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
; CHECK-NEXT: ret
%x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -93,10 +101,10 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: shuffle4_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: mov v2.d[1], v3.d[0]
@@ -206,10 +214,10 @@ define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x
; CHECK-LABEL: shuffle4_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d5, d2
-; CHECK-NEXT: fmov d4, d0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: fmov d4, d0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: mov v4.d[1], v1.d[0]
; CHECK-NEXT: mov v5.d[1], v3.d[0]
@@ -274,10 +282,10 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8
; CHECK-NEXT: mov v0.d[1], v0.d[0]
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: adrp x8, .LCPI6_1
-; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b
-; CHECK-NEXT: tbl v1.8b, { v0.16b }, v1.8b
+; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b
+; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1]
-; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
%x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
%y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
@@ -346,10 +354,10 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x
; CHECK-LABEL: shuffle4_v4i8_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d5, d2
-; CHECK-NEXT: fmov d4, d0
-; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-NEXT: adrp x8, .LCPI8_0
+; CHECK-NEXT: fmov d4, d0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
; CHECK-NEXT: mov v4.d[1], v1.d[0]
; CHECK-NEXT: mov v5.d[1], v3.d[0]
@@ -385,8 +393,12 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x
define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) {
; CHECK-LABEL: shuffle4_v4i16_trunc:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: adrp x8, .LCPI9_0
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
; CHECK-NEXT: ret
%a = trunc <4 x i16> %ae to <4 x i8>
@@ -420,13 +432,13 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %
define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) {
; CHECK-LABEL: shuffle4_v4i32_trunc:
; CHECK: // %bb.0:
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: xtn v4.4h, v0.4s
; CHECK-NEXT: adrp x8, .LCPI10_0
-; CHECK-NEXT: xtn v2.4h, v2.4s
-; CHECK-NEXT: xtn v3.4h, v3.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-NEXT: xtn v5.4h, v1.4s
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
+; CHECK-NEXT: xtn v6.4h, v2.4s
+; CHECK-NEXT: xtn v7.4h, v3.4s
+; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b
; CHECK-NEXT: ret
%a = trunc <4 x i32> %ae to <4 x i8>
%b = trunc <4 x i32> %be to <4 x i8>
@@ -458,8 +470,11 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %
define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) {
; CHECK-LABEL: shuffle3_v4i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
; CHECK-NEXT: ret
%x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -489,9 +504,9 @@ define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: shuffle3_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d3, d2
-; CHECK-NEXT: fmov d2, d0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: adrp x8, .LCPI12_0
+; CHECK-NEXT: fmov d2, d0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: mov v2.d[1], v1.d[0]
; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
@@ -548,12 +563,12 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8>
; CHECK-LABEL: insert4_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: mov v4.16b, v3.16b
-; CHECK-NEXT: mov v3.16b, v1.16b
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: adrp x8, .LCPI14_0
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: adrp x9, .LCPI14_1
+; CHECK-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-NEXT: mov v3.16b, v1.16b
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1]
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
@@ -617,14 +632,16 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8>
define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) {
; CHECK-LABEL: insert4_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v4.16b, v3.16b
; CHECK-NEXT: adrp x8, .LCPI15_0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v3.16b, v1.16b
+; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-NEXT: mov v2.16b, v1.16b
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
; CHECK-NEXT: adrp x8, .LCPI15_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1]
-; CHECK-NEXT: tbl v31.16b, { v2.16b, v3.16b }, v4.16b
+; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b
; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b
; CHECK-NEXT: ret
%e1 = extractelement <8 x i8> %a, i32 4
@@ -688,7 +705,6 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2
; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: frintm v1.2d, v1.2d
; CHECK-NEXT: frintm v5.2d, v5.2d
-; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI16_0]
; CHECK-NEXT: frintm v2.2d, v2.2d
; CHECK-NEXT: frintm v6.2d, v6.2d
; CHECK-NEXT: frintm v3.2d, v3.2d
@@ -701,16 +717,17 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2
; CHECK-NEXT: fcvtzs v6.2d, v6.2d
; CHECK-NEXT: fcvtzs v3.2d, v3.2d
; CHECK-NEXT: fcvtzs v7.2d, v7.2d
-; CHECK-NEXT: xtn v0.2s, v0.2d
-; CHECK-NEXT: xtn v4.2s, v4.2d
-; CHECK-NEXT: xtn v1.2s, v1.2d
-; CHECK-NEXT: xtn v5.2s, v5.2d
-; CHECK-NEXT: xtn v2.2s, v2.2d
-; CHECK-NEXT: xtn v6.2s, v6.2d
-; CHECK-NEXT: xtn v3.2s, v3.2d
-; CHECK-NEXT: xtn v7.2s, v7.2d
-; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-NEXT: tbl v2.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-NEXT: xtn v16.2s, v0.2d
+; CHECK-NEXT: xtn v20.2s, v4.2d
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT: xtn v17.2s, v1.2d
+; CHECK-NEXT: xtn v21.2s, v5.2d
+; CHECK-NEXT: xtn v18.2s, v2.2d
+; CHECK-NEXT: xtn v22.2s, v6.2d
+; CHECK-NEXT: xtn v19.2s, v3.2d
+; CHECK-NEXT: xtn v23.2s, v7.2d
+; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
+; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll
index 00057ea3359b7..41dd7f06712d2 100644
--- a/llvm/test/CodeGen/AArch64/shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/shuffles.ll
@@ -366,7 +366,9 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
; CHECKLE-LABEL: test_shuf9:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: adrp x8, .LCPI13_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECKLE-NEXT: ret
;
@@ -376,10 +378,10 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: adrp x8, .LCPI13_0
; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0
-; CHECKBE-NEXT: ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ret
@@ -416,7 +418,9 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
; CHECKLE-LABEL: test_shuf11:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: adrp x8, .LCPI15_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECKLE-NEXT: ret
;
@@ -426,10 +430,10 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: adrp x8, .LCPI15_0
; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0
-; CHECKBE-NEXT: ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ret
@@ -442,7 +446,9 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
; CHECKLE-LABEL: test_shuf12:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: adrp x8, .LCPI16_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECKLE-NEXT: ret
;
@@ -452,10 +458,10 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: adrp x8, .LCPI16_0
; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0
-; CHECKBE-NEXT: ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ret
@@ -468,7 +474,9 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
; CHECKLE-LABEL: test_shuf13:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: adrp x8, .LCPI17_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECKLE-NEXT: ret
;
@@ -478,10 +486,10 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: adrp x8, .LCPI17_0
; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0
-; CHECKBE-NEXT: ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ret
@@ -494,7 +502,9 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
; CHECKLE-LABEL: test_shuf14:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: adrp x8, .LCPI18_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECKLE-NEXT: ret
;
@@ -504,10 +514,10 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: adrp x8, .LCPI18_0
; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0
-; CHECKBE-NEXT: ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ret
@@ -520,7 +530,9 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
; CHECKLE-LABEL: test_shuf15:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: adrp x8, .LCPI19_0
+; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0]
+; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECKLE-NEXT: ret
;
@@ -530,10 +542,10 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: adrp x8, .LCPI19_0
; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0
-; CHECKBE-NEXT: ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index 1f81a2e4bbb82..b1131f287fe9a 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -33,12 +33,23 @@ define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) {
}
define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: shufflevector_v16i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shufflevector_v16i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI1_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shufflevector_v16i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI1_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
ret <16 x i8> %c
}
@@ -53,12 +64,23 @@ define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) {
}
define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: shufflevector_v8i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shufflevector_v8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI3_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shufflevector_v8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI3_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
ret <8 x i16> %c
}
@@ -215,25 +237,26 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
; CHECK-SD-LABEL: shufflevector_v32i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: adrp x9, .LCPI16_1
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI16_1]
-; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b
-; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v32i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v3.16b, v0.16b
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
; CHECK-GI-NEXT: adrp x9, .LCPI16_0
+; CHECK-GI-NEXT: mov v4.16b, v2.16b
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
-; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
-; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0]
+; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
+; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
ret <32 x i8> %c
@@ -275,25 +298,26 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
; CHECK-SD-LABEL: shufflevector_v16i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI18_0
; CHECK-SD-NEXT: adrp x9, .LCPI18_1
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI18_1]
-; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b
-; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
+; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
+; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v16i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v3.16b, v0.16b
; CHECK-GI-NEXT: adrp x8, .LCPI18_1
; CHECK-GI-NEXT: adrp x9, .LCPI18_0
+; CHECK-GI-NEXT: mov v4.16b, v2.16b
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
-; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI18_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
-; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
+; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
ret <16 x i16> %c
@@ -320,8 +344,10 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-GI-LABEL: shufflevector_v8i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI20_0
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0]
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
@@ -537,12 +563,23 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
}
define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
-; CHECK-LABEL: shufflevector_v7i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI33_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shufflevector_v7i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI33_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shufflevector_v7i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
ret <7 x i16> %c
}
@@ -557,7 +594,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
; CHECK-GI-LABEL: shufflevector_v3i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4>
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
index 52a161ba78525..e7a6c0d6c549b 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
@@ -6,7 +6,9 @@ target triple = "aarch64-linux"
define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #0 {
; CHECK-LABEL: add_f16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fadd za.h[w8, 0, vgx2], { z0.h, z1.h }
; CHECK-NEXT: fadd za.h[w8, 7, vgx2], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -19,7 +21,11 @@ define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
; CHECK-LABEL: add_f16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fadd za.h[w8, 0, vgx4], { z0.h - z3.h }
; CHECK-NEXT: fadd za.h[w8, 7, vgx4], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -35,7 +41,9 @@ define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #1 {
; CHECK-LABEL: sub_f16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fsub za.h[w8, 0, vgx2], { z0.h, z1.h }
; CHECK-NEXT: fsub za.h[w8, 7, vgx2], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -48,7 +56,11 @@ define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
; CHECK-LABEL: sub_f16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fsub za.h[w8, 0, vgx4], { z0.h - z3.h }
; CHECK-NEXT: fsub za.h[w8, 7, vgx4], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -64,7 +76,9 @@ define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
; CHECK-LABEL: add_bf16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
; CHECK-NEXT: bfadd za.h[w8, 7, vgx2], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -77,7 +91,11 @@ define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
; CHECK-LABEL: add_bf16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
; CHECK-NEXT: bfadd za.h[w8, 7, vgx4], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -93,7 +111,9 @@ define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
; CHECK-LABEL: sub_bf16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
; CHECK-NEXT: bfsub za.h[w8, 7, vgx2], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -106,7 +126,11 @@ define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
define void @sub_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
; CHECK-LABEL: sub_bf16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
; CHECK-NEXT: bfsub za.h[w8, 7, vgx4], { z0.h - z3.h }
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
index 402183ab12372..ecaf8bccb71fb 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
@@ -8,7 +8,9 @@
define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: ret
@@ -25,7 +27,9 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: ret
@@ -46,7 +50,11 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: ret
@@ -67,7 +75,11 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: ret
@@ -93,7 +105,11 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: ret
@@ -112,7 +128,11 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: ret
@@ -135,7 +155,15 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: ret
@@ -159,7 +187,15 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: ret
@@ -187,7 +223,9 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
; CHECK-LABEL: multi_vector_add_za_vg1x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }
; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -200,7 +238,9 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
; CHECK-LABEL: multi_vector_add_za_vg1x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -213,7 +253,9 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
; CHECK-LABEL: multi_vector_add_za_vg1x2_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fadd za.s[w8, 0, vgx2], { z0.s, z1.s }
; CHECK-NEXT: fadd za.s[w8, 7, vgx2], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -228,7 +270,9 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
; CHECK-LABEL: multi_vector_add_za_vg1x2_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fadd za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: fadd za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -245,7 +289,11 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
; CHECK-LABEL: multi_vector_add_za_vg1x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }
; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }
; CHECK-NEXT: ret
@@ -262,7 +310,11 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0,
define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) {
; CHECK-LABEL: multi_vector_add_za_vg1x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -279,7 +331,11 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0,
define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
; CHECK-LABEL: multi_vector_add_za_vg1x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fadd za.s[w8, 0, vgx4], { z0.s - z3.s }
; CHECK-NEXT: fadd za.s[w8, 7, vgx4], { z0.s - z3.s }
; CHECK-NEXT: ret
@@ -296,7 +352,11 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) {
; CHECK-LABEL: multi_vector_add_za_vg1x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fadd za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: fadd za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
index 613fba4a73838..3a73ff7cdc29c 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
@@ -7,6 +7,8 @@
define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
; CHECK-LABEL: multi_vector_cvtn_x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtn z0.h, { z0.s, z1.s }
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
@@ -20,6 +22,8 @@ define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1,
define <vscale x 8 x bfloat> @multi_vector_bfcvtn_x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
; CHECK-LABEL: multi_vector_bfcvtn_x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfcvtn z0.h, { z0.s, z1.s }
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvtn.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
index 07b10fdc8eeb2..401cdd0b9dfb7 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
@@ -6,7 +6,9 @@
define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: ret
@@ -23,7 +25,9 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: ret
@@ -40,7 +44,11 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
; CHECK-LABEL: multi_vector_add_single_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: ret
@@ -60,7 +68,11 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
; CHECK-LABEL: multi_vector_add_single_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: ret
@@ -82,7 +94,9 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: ret
@@ -99,7 +113,9 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: ret
@@ -116,7 +132,11 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
; CHECK-LABEL: multi_vector_sub_single_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: ret
@@ -136,7 +156,11 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
; CHECK-LABEL: multi_vector_sub_single_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: ret
@@ -158,7 +182,11 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
; CHECK-LABEL: multi_vector_add_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: ret
@@ -176,7 +204,11 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
define void @multi_vector_add_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
; CHECK-LABEL: multi_vector_add_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: ret
@@ -212,7 +244,15 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, <vscale x 4 x float>
define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
; CHECK-LABEL: multi_vector_add_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: ret
@@ -230,7 +270,15 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
define void @multi_vector_add_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
; CHECK-LABEL: multi_vector_add_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: ret
@@ -272,7 +320,11 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, <vscale x 4 x float>
define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
; CHECK-LABEL: multi_vector_sub_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: ret
@@ -290,7 +342,11 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
; CHECK-LABEL: multi_vector_sub_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: ret
@@ -308,7 +364,15 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <v
define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
; CHECK-LABEL: multi_vector_sub_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: ret
@@ -326,7 +390,15 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
; CHECK-LABEL: multi_vector_sub_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: ret
@@ -346,7 +418,9 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <v
define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
; CHECK-NEXT: ret
@@ -363,7 +437,9 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_add_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
; CHECK-NEXT: ret
@@ -382,8 +458,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
; CHECK-LABEL: multi_vector_add_lane_vg1x2_s_regclass:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov z5.d, z0.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z5.d, z0.d
; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z4.s, z5.s }, z2.s[3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 %slice,
@@ -395,7 +471,11 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
; CHECK-LABEL: multi_vector_add_lane_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
; CHECK-NEXT: ret
@@ -415,7 +495,11 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_add_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
; CHECK-LABEL: multi_vector_add_lane_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
; CHECK-NEXT: ret
@@ -437,8 +521,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
; CHECK-LABEL: multi_vector_add_lane_vg1x4_s_regclass:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z26.d, z3.d
-; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: mov z27.d, z0.d
; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z24.s - z27.s }, z4.s[3]
@@ -456,7 +540,9 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
; CHECK-NEXT: ret
@@ -473,7 +559,9 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
; CHECK-NEXT: ret
@@ -490,7 +578,11 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn
define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
; CHECK-NEXT: ret
@@ -510,7 +602,11 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
index cd8d22441eaa2..b4fd5a2272e7e 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
@@ -26,18 +26,18 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @fdot_multi_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
; CHECK-LABEL: fdot_multi_za32_f16_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
<vscale x 8 x half> %zn4, <vscale x 8 x half> %zn5, <vscale x 8 x half> %zn6, <vscale x 8 x half> %zn7) #0 {
call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
@@ -71,18 +71,18 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
<vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zn5, <vscale x 8 x bfloat> %zn6, <vscale x 8 x bfloat> %zn7) #0 {
call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
@@ -99,7 +99,9 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) #0 {
; CHECK-LABEL: fdot_single_za32_f16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -112,7 +114,11 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) #0 {
; CHECK-LABEL: fdot_single_za32_f16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -128,7 +134,9 @@ define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) #0 {
; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -141,7 +149,11 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused
define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) #0 {
; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -158,8 +170,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: fdot_lane_za32_f16_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: ret
@@ -173,8 +185,8 @@ define void @fdot_lane_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: fdot_lane_za32_f16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]
@@ -195,8 +207,8 @@ define void @bfdot_lane_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: ret
@@ -210,8 +222,8 @@ define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
index f144e33793fe8..99de6f832a3c9 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
@@ -10,7 +10,9 @@
define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_b:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b }
; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b }
; CHECK-NEXT: ret
@@ -23,7 +25,9 @@ define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_h:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h }
; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -36,7 +40,9 @@ define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h }
; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -49,7 +55,9 @@ define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscal
define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h }
; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -62,7 +70,9 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vs
define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s }
; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -75,7 +85,9 @@ define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s }
; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -88,7 +100,9 @@ define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vsca
define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
@@ -98,7 +112,9 @@ define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
; CHECK-LABEL: za_write_vg2_horiz_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
@@ -110,7 +126,9 @@ define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsc
define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_b:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b }
; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b }
; CHECK-NEXT: ret
@@ -123,7 +141,9 @@ define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_h:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h }
; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -136,7 +156,9 @@ define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h }
; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -149,7 +171,9 @@ define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale
define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h }
; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h }
; CHECK-NEXT: ret
@@ -162,7 +186,9 @@ define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vsc
define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s }
; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -175,7 +201,9 @@ define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s }
; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -188,7 +216,9 @@ define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscal
define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
@@ -198,7 +228,9 @@ define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
; CHECK-LABEL: za_write_vg2_vert_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
@@ -214,7 +246,11 @@ define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsca
define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_b:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b }
; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b }
; CHECK-NEXT: ret
@@ -227,7 +263,11 @@ define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_h:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h }
; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -240,7 +280,11 @@ define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h }
; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -253,7 +297,11 @@ define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscal
define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h }
; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -266,7 +314,11 @@ define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vs
define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
@@ -276,7 +328,11 @@ define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
@@ -286,7 +342,11 @@ define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vsca
define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
@@ -296,7 +356,11 @@ define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
; CHECK-LABEL: za_write_vg4_horiz_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
@@ -308,7 +372,11 @@ define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsc
define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_b:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b }
; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b }
; CHECK-NEXT: ret
@@ -321,7 +389,11 @@ define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_h:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h }
; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -334,7 +406,11 @@ define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h }
; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -347,7 +423,11 @@ define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale
define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h }
; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h }
; CHECK-NEXT: ret
@@ -360,7 +440,11 @@ define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vsc
define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
@@ -370,7 +454,11 @@ define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
@@ -380,7 +468,11 @@ define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscal
define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
@@ -390,7 +482,11 @@ define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
; CHECK-LABEL: za_write_vg4_vert_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w12, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
@@ -404,7 +500,9 @@ define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsca
define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2) {
; CHECK-LABEL: za_write_vg1x2_b:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -417,7 +515,9 @@ define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16
define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2) {
; CHECK-LABEL: za_write_vg1x2_h:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -430,7 +530,9 @@ define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x
define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2) {
; CHECK-LABEL: za_write_vg1x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -443,7 +545,9 @@ define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x
define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2) {
; CHECK-LABEL: za_write_vg1x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -456,7 +560,9 @@ define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale
define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2) {
; CHECK-LABEL: za_write_vg1x2_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -469,7 +575,9 @@ define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x
define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2) {
; CHECK-LABEL: za_write_vg1x2_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -482,7 +590,9 @@ define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x
define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2) {
; CHECK-LABEL: za_write_vg1x2_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -495,7 +605,9 @@ define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x
define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2) {
; CHECK-LABEL: za_write_vg1x2_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -512,7 +624,11 @@ define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale
define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2, <vscale x 16 x i8> %za3, <vscale x 16 x i8> %za4) {
; CHECK-LABEL: za_write_vg1x4_b:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -525,7 +641,11 @@ define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16
define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2, <vscale x 8 x i16> %za3, <vscale x 8 x i16> %za4) {
; CHECK-LABEL: za_write_vg1x4_h:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -538,7 +658,11 @@ define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x
define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2, <vscale x 8 x half> %za3, <vscale x 8 x half> %za4) {
; CHECK-LABEL: za_write_vg1x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -551,7 +675,11 @@ define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x
define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2, <vscale x 8 x bfloat> %za3, <vscale x 8 x bfloat> %za4) {
; CHECK-LABEL: za_write_vg1x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -564,7 +692,11 @@ define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale
define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2, <vscale x 4 x i32> %za3, <vscale x 4 x i32> %za4) {
; CHECK-LABEL: za_write_vg1x4_s:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -577,7 +709,11 @@ define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x
define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2, <vscale x 4 x float> %za3, <vscale x 4 x float> %za4) {
; CHECK-LABEL: za_write_vg1x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -590,7 +726,11 @@ define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x
define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4) {
; CHECK-LABEL: za_write_vg1x4_d:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -603,7 +743,11 @@ define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x
define void @za_write_vg1x4_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4) {
; CHECK-LABEL: za_write_vg1x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll
index 3ce77cd8e0321..e154a4df86efe 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll
@@ -26,18 +26,18 @@ define void @udot_multi_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_multi_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: udot_multi_za32_u16_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
<vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zn5, <vscale x 8 x i16> %zn6, <vscale x 8 x i16> %zn7) #0 {
call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
@@ -68,18 +68,18 @@ define void @udot_multi_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
define void @udot_multi_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
; CHECK-LABEL: udot_multi_za32_u8_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
<vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zn5, <vscale x 16 x i8> %zn6, <vscale x 16 x i8> %zn7) #0 {
call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
@@ -110,18 +110,18 @@ define void @udot_multi_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_multi_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: udot_multi_za64_u16_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
<vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zn5, <vscale x 8 x i16> %zn6, <vscale x 8 x i16> %zn7) #1 {
call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
@@ -152,18 +152,18 @@ define void @usdot_multi_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @usdot_multi_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
; CHECK-LABEL: usdot_multi_za32_u8_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
<vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zn5, <vscale x 16 x i8> %zn6, <vscale x 16 x i8> %zn7) #0 {
call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
@@ -197,18 +197,18 @@ define void @sdot_multi_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @sdot_multi_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: sdot_multi_za32_u16_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
<vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zn5, <vscale x 8 x i16> %zn6, <vscale x 8 x i16> %zn7) #0 {
call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
@@ -239,18 +239,18 @@ define void @sdot_multi_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
define void @sdot_multi_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
; CHECK-LABEL: sdot_multi_za32_u8_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
<vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zn5, <vscale x 16 x i8> %zn6, <vscale x 16 x i8> %zn7) #0 {
call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
@@ -281,18 +281,18 @@ define void @sdot_multi_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @sdot_multi_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: sdot_multi_za64_u16_vg1x4:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
<vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zn5, <vscale x 8 x i16> %zn6, <vscale x 8 x i16> %zn7) #1 {
call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
@@ -309,7 +309,9 @@ define void @sdot_multi_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_single_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) #0 {
; CHECK-LABEL: udot_single_za32_u16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -322,7 +324,11 @@ define void @udot_single_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @udot_single_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) #0 {
; CHECK-LABEL: udot_single_za32_u16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -335,7 +341,9 @@ define void @udot_single_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @udot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) #0 {
; CHECK-LABEL: udot_single_za32_u8_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -348,7 +356,11 @@ define void @udot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) #0 {
; CHECK-LABEL: udot_single_za32_u8_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -361,7 +373,9 @@ define void @udot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_single_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) #1 {
; CHECK-LABEL: udot_single_za64_u16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -374,7 +388,11 @@ define void @udot_single_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @udot_single_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) #1 {
; CHECK-LABEL: udot_single_za64_u16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -387,7 +405,9 @@ define void @udot_single_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @usdot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) #0 {
; CHECK-LABEL: usdot_single_za32_u8_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -400,7 +420,11 @@ define void @usdot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @usdot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) #0 {
; CHECK-LABEL: usdot_single_za32_u8_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -416,7 +440,9 @@ define void @usdot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @sdot_single_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) #0 {
; CHECK-LABEL: sdot_single_za32_u16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -429,7 +455,11 @@ define void @sdot_single_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @sdot_single_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) #0 {
; CHECK-LABEL: sdot_single_za32_u16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -442,7 +472,9 @@ define void @sdot_single_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @sdot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) #0 {
; CHECK-LABEL: sdot_single_za32_u8_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -455,7 +487,11 @@ define void @sdot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @sdot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) #0 {
; CHECK-LABEL: sdot_single_za32_u8_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -468,7 +504,9 @@ define void @sdot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <
define void @sdot_single_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) #1 {
; CHECK-LABEL: sdot_single_za64_u16_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -481,7 +519,11 @@ define void @sdot_single_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @sdot_single_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) #1 {
; CHECK-LABEL: sdot_single_za64_u16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -494,7 +536,9 @@ define void @sdot_single_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
define void @sudot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) #0 {
; CHECK-LABEL: sudot_single_za32_u8_vg1x2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -507,7 +551,11 @@ define void @sudot_single_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @sudot_single_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) #0 {
; CHECK-LABEL: sudot_single_za32_u8_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: sudot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -523,8 +571,8 @@ define void @udot_lane_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: udot_lane_za32_u16_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: ret
@@ -537,7 +585,11 @@ define void @udot_lane_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
define void @udot_lane_za32_u16_vg1x4(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) #0 {
; CHECK-LABEL: udot_lane_za32_u16_vg1x4:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[3]
; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[3]
; CHECK-NEXT: ret
@@ -553,8 +605,8 @@ define void @udot_lane_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vs
; CHECK-LABEL: udot_lane_za32_u8_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: ret
@@ -568,8 +620,8 @@ define void @udot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vs
; CHECK-LABEL: udot_lane_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z24.b - z27.b }, z5.b[3]
@@ -583,107 +635,12 @@ define void @udot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vs
ret void
}
-define void @udot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: udot_form_2x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: mov z2.d, z16.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z0.d, z24.d
-; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z2.b, z3.b }, z0.b[0]
-; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %4 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 0
- %6 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 1
- tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %5, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %6, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
-define void @udot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: udot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
define void @udot_lane_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) #1 {
; CHECK-LABEL: udot_lane_za64_u16_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1]
; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1]
; CHECK-NEXT: ret
@@ -697,8 +654,8 @@ define void @udot_lane_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: udot_lane_za64_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z24.h - z27.h }, z5.h[1]
@@ -716,8 +673,8 @@ define void @usdot_lane_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: usdot_lane_za32_u8_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: ret
@@ -731,8 +688,8 @@ define void @usdot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: usdot_lane_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z24.b - z27.b }, z5.b[3]
@@ -746,100 +703,6 @@ define void @usdot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
ret void
}
-define void @usdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: usdot_form_2x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: mov z2.d, z16.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z0.d, z24.d
-; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z2.b, z3.b }, z0.b[0]
-; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %4 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 0
- %6 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 1
- tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %5, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %6, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
-define void @usdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: usdot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
; == Multi, indexed (signed) ==
@@ -847,8 +710,8 @@ define void @sdot_lane_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: sdot_lane_za32_u16_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
; CHECK-NEXT: ret
@@ -862,8 +725,8 @@ define void @sdot_lane_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: sdot_lane_za32_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]
@@ -881,8 +744,8 @@ define void @sdot_lane_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vs
; CHECK-LABEL: sdot_lane_za32_u8_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: ret
@@ -896,8 +759,8 @@ define void @sdot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vs
; CHECK-LABEL: sdot_lane_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z24.b - z27.b }, z5.b[3]
@@ -911,107 +774,12 @@ define void @sdot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vs
ret void
}
-define void @sdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: sdot_form_2x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: mov z2.d, z16.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z0.d, z24.d
-; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z2.b, z3.b }, z0.b[0]
-; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %4 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 0
- %6 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 1
- tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %5, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %6, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
-define void @sdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: sdot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
define void @sdot_lane_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) #1 {
; CHECK-LABEL: sdot_lane_za64_u16_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1]
; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1]
; CHECK-NEXT: ret
@@ -1025,8 +793,8 @@ define void @sdot_lane_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: sdot_lane_za64_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z24.h - z27.h }, z5.h[1]
@@ -1046,8 +814,8 @@ define void @sudot_lane_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: sudot_lane_za32_u8_vg1x2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3]
; CHECK-NEXT: ret
@@ -1061,8 +829,8 @@ define void @sudot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
; CHECK-LABEL: sudot_lane_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z24.b - z27.b }, z5.b[3]
@@ -1076,103 +844,9 @@ define void @sudot_lane_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
ret void
}
-define void @sudot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: sudot_form_2x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: mov z2.d, z16.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z0.d, z24.d
-; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z2.b, z3.b }, z0.b[0]
-; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %4 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 0
- %6 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %4, 1
- tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %5, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %6, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
-define void @sudot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: sudot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" }
-attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" }
+attributes #0 = { nounwind "target-features"="+sme2" }
+attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" }
; == Multi, multi (unsigned)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
index 3616e074d408e..79db677853cb5 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
@@ -114,6 +114,8 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_u64(<
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_max_single_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -314,6 +316,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_max_single_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -505,6 +511,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_u64(<v
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
; CHECK-LABEL: multi_vec_max_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -563,20 +573,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -588,20 +598,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -613,20 +623,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_s32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -638,20 +648,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_s64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -665,20 +675,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -690,20 +700,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -715,20 +725,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_u32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -740,20 +750,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_u64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -767,6 +777,14 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
; CHECK-LABEL: multi_vec_max_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -778,20 +796,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -803,20 +821,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -828,20 +846,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
; CHECK-LABEL: multi_vec_max_multi_x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -855,6 +873,8 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -907,6 +927,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_single
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -974,6 +998,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
; CHECK-LABEL: multi_vec_maxnm_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1032,6 +1060,14 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x2_f64(
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
; CHECK-LABEL: multi_vec_maxnm_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1043,20 +1079,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
; CHECK-LABEL: multi_vec_maxnm_x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1067,20 +1103,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
; CHECK-LABEL: multi_vec_maxnm_x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1091,20 +1127,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
; CHECK-LABEL: multi_vec_maxnm_x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
index 58a0989f25d82..e5c36d42fb135 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
@@ -114,6 +114,8 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_min_single_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -314,6 +316,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_min_single_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -505,6 +511,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<v
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
; CHECK-LABEL: multi_vec_min_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -563,20 +573,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -588,20 +598,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -613,20 +623,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_s32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -638,20 +648,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_s64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -665,20 +675,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -690,20 +700,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -715,20 +725,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_u32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -740,20 +750,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_u64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -768,6 +778,14 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
; CHECK-LABEL: multi_vec_min_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -779,20 +797,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -804,20 +822,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -829,20 +847,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
; CHECK-LABEL: multi_vec_min_multi_x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -856,6 +874,8 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_minnm_single_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -908,6 +928,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_single
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vec_minnm_single_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -975,6 +999,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
; CHECK-LABEL: multi_vec_minnm_x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1033,6 +1061,14 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
; CHECK-LABEL: multi_vec_minnm_x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1044,20 +1080,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
; CHECK-LABEL: multi_vec_minnm_x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1068,20 +1104,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
; CHECK-LABEL: multi_vec_minnm_x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1092,20 +1128,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
; CHECK-LABEL: multi_vec_minnm_x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
index e5e3da05edced..346afc611eb75 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
@@ -38,7 +38,9 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, <vscale x 8
define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -51,7 +53,9 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16
define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -66,7 +70,11 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8
define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -79,7 +87,11 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16
define void @multi_vector_mul_add_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -130,18 +142,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
%slice.4 = add i32 %slice, 4
@@ -152,18 +164,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_add_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
%slice.4 = add i32 %slice, 4
@@ -205,8 +217,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
; CHECK-NEXT: ret
@@ -220,8 +232,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
; CHECK-NEXT: ret
@@ -237,8 +249,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -254,8 +266,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -302,7 +314,9 @@ define void @multi_vector_mul_add_single_long_vg4x1_u16(i32 %slice, <vscale x 8
define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -315,7 +329,9 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16
define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -330,7 +346,11 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8
define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -343,7 +363,11 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16
define void @multi_vector_mul_add_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -394,18 +418,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
%slice.4 = add i32 %slice, 4
@@ -416,18 +440,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_add_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
%slice.4 = add i32 %slice, 4
@@ -469,8 +493,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
; CHECK-NEXT: ret
@@ -484,8 +508,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
; CHECK-NEXT: ret
@@ -501,8 +525,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -518,8 +542,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -566,7 +590,9 @@ define void @multi_vector_mul_sub_single_long_vg4x1_s16(i32 %slice, <vscale x 8
define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -579,7 +605,9 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16
define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -594,7 +622,11 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8
define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -607,7 +639,11 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16
define void @multi_vector_mul_sub_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -658,18 +694,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
%slice.4 = add i32 %slice, 4
@@ -680,18 +716,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_sub_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
%slice.4 = add i32 %slice, 4
@@ -733,8 +769,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
; CHECK-NEXT: ret
@@ -748,8 +784,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
; CHECK-NEXT: ret
@@ -765,8 +801,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -782,8 +818,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -830,7 +866,9 @@ define void @multi_vector_mul_sub_single_long_vg4x1_u16(i32 %slice, <vscale x 8
define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -843,7 +881,9 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16
define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
@@ -858,7 +898,11 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8
define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -871,7 +915,11 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16
define void @multi_vector_mul_sub_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
; CHECK-NEXT: ret
@@ -922,18 +970,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
%slice.4 = add i32 %slice, 4
@@ -944,18 +992,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_sub_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
%slice.4 = add i32 %slice, 4
@@ -997,8 +1045,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
; CHECK-NEXT: ret
@@ -1012,8 +1060,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
; CHECK-NEXT: ret
@@ -1029,8 +1077,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1046,8 +1094,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -1068,7 +1116,9 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -1083,7 +1133,11 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscal
define void @multi_vector_mul_add_single_signed_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -1114,8 +1168,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, <vscale
; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x2_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
; CHECK-NEXT: ret
@@ -1131,8 +1185,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x4_s8(i32 %slice, <vscale
; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x4_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1166,7 +1220,9 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x1_s8(i32 %slice, <vsc
define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
@@ -1181,7 +1237,11 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vsc
define void @multi_vector_mul_add_single_unsigned_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
; CHECK-NEXT: ret
@@ -1215,18 +1275,18 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, <vsca
define void @multi_vector_mul_add_multi_unsigned_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z25.d, z6.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
%slice.4 = add i32 %slice, 4
@@ -1255,8 +1315,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, <vscal
; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x2_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
; CHECK-NEXT: ret
@@ -1272,8 +1332,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x4_s8(i32 %slice, <vscal
; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x4_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z27.d, z4.d
-; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z26.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
index 1548c1612b896..ba10c2dd3cf48 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
@@ -120,7 +120,9 @@ define void @multi_vector_sub_single_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -133,7 +135,9 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -146,7 +150,9 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -159,7 +165,9 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -176,7 +184,9 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -189,7 +199,9 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -202,7 +214,9 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -215,7 +229,9 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
; CHECK-NEXT: ret
@@ -232,7 +248,11 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -249,8 +269,11 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x4_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: mov z3.d, z2.d
; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -267,7 +290,11 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -284,7 +311,11 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_single_vg2x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -305,7 +336,11 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -322,7 +357,11 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -339,7 +378,11 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -356,7 +399,11 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
; CHECK-NEXT: ret
@@ -377,7 +424,11 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0, <vscale x 8 x bfloat> %zm1) {
; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -392,7 +443,11 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -407,7 +462,11 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -422,7 +481,11 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -441,7 +504,11 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0, <vscale x 8 x bfloat> %zm1) {
; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -456,7 +523,11 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -471,7 +542,11 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -486,7 +561,11 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
; CHECK-NEXT: ret
@@ -505,7 +584,15 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -523,7 +610,15 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -541,7 +636,15 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -559,7 +662,15 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -581,7 +692,15 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -599,7 +718,15 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -617,7 +744,15 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -635,7 +770,15 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
@@ -769,7 +912,9 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %zn,
define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -784,7 +929,9 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -799,7 +946,9 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -814,7 +963,9 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -833,7 +984,9 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -848,7 +1001,9 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -863,7 +1018,9 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -878,7 +1035,9 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
; CHECK-NEXT: ret
@@ -897,7 +1056,11 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -914,7 +1077,11 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -931,7 +1098,11 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -948,7 +1119,11 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -969,7 +1144,11 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -986,7 +1165,11 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -1003,7 +1186,11 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
@@ -1020,7 +1207,11 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
index b95a774e899c8..12a940ff03e29 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
@@ -324,20 +324,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x2_s64
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -348,20 +348,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -372,20 +372,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_s32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -396,20 +396,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_s64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
@@ -484,20 +484,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_uhl_x2_u64
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_u8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -508,20 +508,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_u16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -532,20 +532,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_u32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -556,20 +556,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
; CHECK-LABEL: multi_vec_rounding_shl_x4_u64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
index 07a5f7993a1cb..f41791e626f5f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
@@ -8,6 +8,7 @@ define <vscale x 2 x i64> @test_tileslice_no_add(i32 %idx) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1
; CHECK-NEXT: ret
entry:
%read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx)
@@ -20,6 +21,7 @@ define <vscale x 2 x i64> @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2)
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add w8, w0, w1
; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1
; CHECK-NEXT: ret
entry:
%add = add i32 %idx1, %idx2
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
index 68ae92bc68f4b..e71afe213d8a5 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
@@ -196,20 +196,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_mul
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -221,20 +221,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -246,20 +246,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -271,20 +271,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z30.d, z7.d
+; CHECK-NEXT: mov z27.d, z4.d
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z26.d, z7.d
-; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: mov z1.d, z5.d
-; CHECK-NEXT: mov z2.d, z6.d
-; CHECK-NEXT: mov z3.d, z7.d
+; CHECK-NEXT: mov z29.d, z6.d
+; CHECK-NEXT: mov z26.d, z3.d
+; CHECK-NEXT: mov z28.d, z5.d
+; CHECK-NEXT: mov z25.d, z2.d
+; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT: mov z24.d, z1.d
+; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
+; CHECK-NEXT: mov z2.d, z26.d
+; CHECK-NEXT: mov z3.d, z27.d
; CHECK-NEXT: ret
<vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
index 46409a0a80b78..da8c679d5a39a 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
@@ -8,7 +8,9 @@
define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
; CHECK-NEXT: ret
@@ -25,7 +27,9 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
; CHECK-NEXT: ret
@@ -46,7 +50,11 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
; CHECK-NEXT: ret
@@ -67,7 +75,11 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
; CHECK-NEXT: ret
@@ -93,7 +105,11 @@ define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
; CHECK-NEXT: ret
@@ -112,7 +128,11 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
; CHECK-NEXT: ret
@@ -135,7 +155,15 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
; CHECK-NEXT: ret
@@ -159,7 +187,15 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
; CHECK-NEXT: ret
@@ -189,7 +225,9 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }
; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -202,7 +240,9 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -215,7 +255,9 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fsub za.s[w8, 0, vgx2], { z0.s, z1.s }
; CHECK-NEXT: fsub za.s[w8, 7, vgx2], { z0.s, z1.s }
; CHECK-NEXT: ret
@@ -230,7 +272,9 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fsub za.d[w8, 0, vgx2], { z0.d, z1.d }
; CHECK-NEXT: fsub za.d[w8, 7, vgx2], { z0.d, z1.d }
; CHECK-NEXT: ret
@@ -247,7 +291,11 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }
; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }
; CHECK-NEXT: ret
@@ -266,7 +314,11 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
@@ -285,7 +337,11 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fsub za.s[w8, 0, vgx4], { z0.s - z3.s }
; CHECK-NEXT: fsub za.s[w8, 7, vgx4], { z0.s - z3.s }
; CHECK-NEXT: ret
@@ -304,7 +360,11 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
define void @multi_vector_sub_za_vg1x4_f64(i32 %slice,
; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fsub za.d[w8, 0, vgx4], { z0.d - z3.d }
; CHECK-NEXT: fsub za.d[w8, 7, vgx4], { z0.d - z3.d }
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
index f552c9e604bdd..b698b60007eb9 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
@@ -1,14 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+sme-i16i64 -verify-machineinstrs < %s | FileCheck %s
-target triple="aarch64-linux-gnu"
; == FVDOT ==
-define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm) #0 {
+define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm) {
; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: fvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: ret
@@ -21,10 +22,12 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, <vscale x 8 x half>
; == BFVDOT ==
-define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm) #0 {
+define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: bfvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: ret
@@ -37,10 +40,12 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloa
; == SVDOT ==
-define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) #0 {
+define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: svdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: ret
@@ -50,10 +55,14 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %
ret void
}
-define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #0 {
+define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_svdot_lane_za32_vg1x4_nxv16i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: svdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: ret
@@ -63,10 +72,14 @@ define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %
ret void
}
-define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) #1 {
+define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_svdot_lane_za64_vg1x4_nxv8i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: svdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1]
; CHECK-NEXT: svdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1]
; CHECK-NEXT: ret
@@ -76,108 +89,15 @@ define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %
ret void
}
-define void @svdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: svdot_form_2x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: add x9, x0, x1
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0]
-; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9]
-; CHECK-NEXT: mov z2.d, z16.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0]
-; CHECK-NEXT: mov z0.d, z24.d
-; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0]
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %1, 0
- %3 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %1, 1
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %4 = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2)
- %5 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %4, 0
- %6 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %4, 1
- tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, <vscale x 8 x i16> %2, <vscale x 8 x i16> %5, <vscale x 8 x i16> undef, i32 0)
- tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, <vscale x 8 x i16> %3, <vscale x 8 x i16> %6, <vscale x 8 x i16> undef, i32 0)
- ret void
-}
-
-define void @svdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: svdot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
; == UVDOT ==
-define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) #0 {
+define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: uvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3]
; CHECK-NEXT: ret
@@ -187,10 +107,14 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %
ret void
}
-define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #0 {
+define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_uvdot_lane_za32_vg1x4_nxv16i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: uvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: ret
@@ -200,10 +124,14 @@ define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %
ret void
}
-define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) #1 {
+define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_uvdot_lane_za64_vg1x4_nxv8i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: uvdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1]
; CHECK-NEXT: uvdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1]
; CHECK-NEXT: ret
@@ -213,108 +141,17 @@ define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %
ret void
}
-define void @uvdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: uvdot_form_2x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: add x9, x0, x1
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0]
-; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9]
-; CHECK-NEXT: mov z2.d, z16.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0]
-; CHECK-NEXT: mov z0.d, z24.d
-; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0]
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %1, 0
- %3 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %1, 1
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %4 = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2)
- %5 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %4, 0
- %6 = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %4, 1
- tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, <vscale x 8 x i16> %2, <vscale x 8 x i16> %5, <vscale x 8 x i16> undef, i32 0)
- tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, <vscale x 8 x i16> %3, <vscale x 8 x i16> %6, <vscale x 8 x i16> undef, i32 0)
- ret void
-}
-
-define void @uvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: uvdot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
; == SUVDOT ==
-define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #0 {
+define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_suvdot_lane_za32_vg1x4_nxv16i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: suvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: ret
@@ -324,80 +161,17 @@ define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8>
ret void
}
-define void @suvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: suvdot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
; == USVDOT ==
-define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #0 {
+define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_usvdot_lane_za32_vg1x4_nxv16i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: usvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3]
; CHECK-NEXT: ret
@@ -407,76 +181,6 @@ define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8>
ret void
}
-define void @usvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
-; CHECK-LABEL: usvdot_form_4x_tuple:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ptrue pn8.b
-; CHECK-NEXT: lsl x9, x1, #1
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0]
-; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1]
-; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: add x9, x9, x1
-; CHECK-NEXT: mov z0.d, z17.d
-; CHECK-NEXT: mov z1.d, z16.d
-; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9]
-; CHECK-NEXT: mov z4.d, z21.d
-; CHECK-NEXT: mov z5.d, z20.d
-; CHECK-NEXT: mov z8.d, z25.d
-; CHECK-NEXT: mov z9.d, z24.d
-; CHECK-NEXT: mov z3.d, z16.d
-; CHECK-NEXT: mov z7.d, z17.d
-; CHECK-NEXT: mov z11.d, z18.d
-; CHECK-NEXT: mov z16.d, z29.d
-; CHECK-NEXT: mov z17.d, z28.d
-; CHECK-NEXT: mov z18.d, z14.d
-; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
-; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0]
-; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0]
-; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
- %1 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr)
- %2 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 0
- %3 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 1
- %4 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 2
- %5 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %1, 3
- %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
- %6 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2)
- %7 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 0
- %8 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 1
- %9 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 2
- %10 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, 3
- %mul3 = shl i64 %stride, 1
- %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
- %11 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4)
- %12 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 0
- %13 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 1
- %14 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 2
- %15 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %11, 3
- %mul5 = mul i64 %stride, 3
- %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
- %16 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6)
- %17 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 0
- %18 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 1
- %19 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 2
- %20 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %16, 3
- tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %7, <vscale x 16 x i8> %12, <vscale x 16 x i8> %17, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %3, <vscale x 16 x i8> %8, <vscale x 16 x i8> %13, <vscale x 16 x i8> %18, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %4, <vscale x 16 x i8> %9, <vscale x 16 x i8> %14, <vscale x 16 x i8> %19, <vscale x 16 x i8> undef, i32 0)
- tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, <vscale x 16 x i8> %5, <vscale x 16 x i8> %10, <vscale x 16 x i8> %15, <vscale x 16 x i8> %20, <vscale x 16 x i8> undef, i32 0)
- ret void
-}
-
-attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" }
-attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" }
; == FVDOT ==
declare void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index 6895d1854e87d..fb169491b0c90 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -13,7 +13,7 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) v
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16
; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0]
; CHECK-NEXT: ret
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index 0ecf1b1a98834..8882fc9290386 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -15,6 +15,8 @@
define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8_valid_imm:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 2, i64 0
@@ -28,7 +30,9 @@ define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: rdvl x8, #3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -42,7 +46,9 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vsca
define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: rdvl x8, #-18
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -18, i64 0
@@ -56,7 +62,9 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: rdvl x8, #16
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 16, i64 0
@@ -70,6 +78,8 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16, i64 0
@@ -83,6 +93,8 @@ define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14, i64 0
@@ -100,6 +112,8 @@ define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 2, i64 0
@@ -113,6 +127,8 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 2, i64 0
@@ -130,6 +146,8 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 4, i64 0
@@ -143,6 +161,8 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 6, i64 0
@@ -160,6 +180,8 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 8, i64 0
@@ -173,6 +195,8 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 10, i64 0
@@ -190,6 +214,9 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_valid_imm:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -204,7 +231,10 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: rdvl x8, #4
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -219,7 +249,10 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: rdvl x8, #5
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -234,7 +267,10 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: rdvl x8, #-27
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -27, i64 0
@@ -249,7 +285,10 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: rdvl x8, #24
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 24, i64 0
@@ -264,6 +303,9 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24, i64 0
@@ -278,6 +320,9 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21, i64 0
@@ -296,6 +341,9 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 6, i64 0
@@ -310,6 +358,9 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 9, i64 0
@@ -328,6 +379,9 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 12, i64 0
@@ -342,6 +396,9 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 15, i64 0
@@ -360,6 +417,9 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 18, i64 0
@@ -374,6 +434,9 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 -3, i64 0
@@ -392,6 +455,10 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8_valid_imm:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -407,7 +474,11 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: rdvl x8, #5
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -423,7 +494,11 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: rdvl x8, #6
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 6, i64 0
@@ -439,7 +514,11 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: rdvl x8, #7
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 7, i64 0
@@ -457,8 +536,12 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov x9, #-576 // =0xfffffffffffffdc0
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
@@ -480,8 +563,12 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #512 // =0x200
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
; CHECK-NEXT: ret
; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
@@ -501,6 +588,10 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32, i64 0
@@ -516,6 +607,10 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28, i64 0
@@ -535,6 +630,10 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 8, i64 0
@@ -550,6 +649,10 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 12, i64 0
@@ -569,6 +672,10 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 16, i64 0
@@ -584,6 +691,10 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 20, i64 0
@@ -603,6 +714,10 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 24, i64 0
@@ -618,6 +733,10 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index d7b7e59548003..d6ee787a23f87 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -9,6 +9,8 @@
define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2b_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, ptr %addr, i64 %offset
@@ -26,6 +28,8 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, ptr %addr, i64 %offset
@@ -39,6 +43,8 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, ptr %addr, i64 %offset
@@ -56,6 +62,8 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, ptr %addr, i64 %offset
@@ -69,6 +77,8 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, ptr %addr, i64 %offset
@@ -86,6 +96,8 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %addr, i64 %offset
@@ -99,6 +111,8 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, ptr %addr, i64 %offset
@@ -116,6 +130,9 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3b_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, ptr %addr, i64 %offset
@@ -134,6 +151,9 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, ptr %addr, i64 %offset
@@ -148,6 +168,9 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, ptr %addr, i64 %offset
@@ -166,6 +189,9 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, ptr %addr, i64 %offset
@@ -180,6 +206,9 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, ptr %addr, i64 %offset
@@ -198,6 +227,9 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %addr, i64 %offset
@@ -212,6 +244,9 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, ptr %addr, i64 %offset
@@ -230,6 +265,10 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4b_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, ptr %addr, i64 %offset
@@ -249,6 +288,10 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, ptr %addr, i64 %offset
@@ -264,6 +307,10 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, ptr %addr, i64 %offset
@@ -283,6 +330,10 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, ptr %addr, i64 %offset
@@ -298,6 +349,10 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, ptr %addr, i64 %offset
@@ -317,6 +372,10 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %addr, i64 %offset
@@ -332,6 +391,10 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, ptr %addr, i64 %offset
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index e03d4379d0ee2..d07fd8785121b 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -9,6 +9,8 @@
define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2b_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
@@ -25,6 +27,8 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
@@ -37,6 +41,8 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
@@ -49,6 +55,8 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr) #0 {
; CHECK-LABEL: st2h_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -65,6 +73,8 @@ define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
@@ -77,6 +87,8 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
@@ -93,6 +105,8 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
@@ -105,6 +119,8 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
@@ -117,6 +133,8 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2d_ptr:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -133,6 +151,9 @@ define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3b_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -150,6 +171,9 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -163,6 +187,9 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -176,6 +203,9 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) #0 {
; CHECK-LABEL: st3h_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -193,6 +223,9 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -206,6 +239,9 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -223,6 +259,9 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -236,6 +275,9 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -249,6 +291,9 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3d_ptr:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st3.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -266,6 +311,10 @@ define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4b_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -284,6 +333,10 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4h_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -298,6 +351,10 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4h_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -312,6 +369,10 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) #0 {
; CHECK-LABEL: st4h_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -330,6 +391,10 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4w_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -344,6 +409,10 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4w_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -362,6 +431,10 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4d_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -376,6 +449,10 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4d_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
@@ -390,6 +467,10 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
define void @st4d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x ptr> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4d_ptr:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st4.nxv2p0(<vscale x 2 x ptr> %v0,
diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
index f6330c613de84..47758893ce711 100644
--- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
%complex = type { { double, double } }
@@ -11,13 +10,11 @@ declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x
define void @foo1(ptr %outval, <vscale x 2 x i1> %pred, ptr %inptr) {
; CHECK-LABEL: foo1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x1]
-; CHECK-NEXT: faddv d0, p0, z0.d
-; CHECK-NEXT: faddv d1, p0, z1.d
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: ret
+; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1]
+; CHECK-NEXT: faddv d2, p0, z0.d
+; CHECK-NEXT: faddv d0, p0, z1.d
+; CHECK-NEXT: mov v2.d[1], v0.d[0]
+; CHECK-NEXT: str q2, [x0]
%imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1
%1 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> %pred, ptr nonnull %inptr)
%2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 4e52258e8b5df..66d544d0acbf5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -21,9 +21,9 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind {
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: mov z1.b, z0.b[1]
+; CHECK-NEXT: mov z2.b, z0.b[1]
; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: stp w8, w9, [sp, #8]
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: st1b { z0.s }, p0, [x19]
@@ -198,9 +198,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: mov x8, #4 // =0x4
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20]
-; CHECK-NEXT: ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3]
+; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: stp q0, q1, [x19]
+; CHECK-NEXT: stp q0, q2, [x19]
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
index dd27097d8bdf7..b66e6d9013573 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
@@ -38,6 +38,8 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2
; CHECK-LABEL: interleave_store_without_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
;
@@ -73,12 +75,13 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2)
; CHECK-LABEL: interleave_store_legalization:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z0.d
-; CHECK-NEXT: mov x8, #8 // =0x8
-; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z2_z3
; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: mov x8, #8 // =0x8
+; CHECK-NEXT: mov z4.d, z0.d
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0]
-; CHECK-NEXT: st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: interleave_store_legalization:
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
index b200eb3f23bf2..9fd1eb616c28c 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
@@ -8,8 +8,9 @@
define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: tbl2_b:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.b, { z0.b, z1.b }, z3.b
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.b, { z1.b, z2.b }, z3.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl2.nxv16i8(<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b,
@@ -20,8 +21,9 @@ define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unu
define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: tbl2_h:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl2.nxv8i16(<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b,
@@ -32,8 +34,9 @@ define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unu
define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: tbl2_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl2.nxv4i32(<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b,
@@ -44,8 +47,9 @@ define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unu
define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: tbl2_d:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl2.nxv2i64(<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b,
@@ -56,8 +60,9 @@ define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unu
define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half> %unused, <vscale x 8 x half> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: tbl2_fh:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl2.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
@@ -68,8 +73,9 @@ define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half>
define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %unused, <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) #0 {
; CHECK-LABEL: tbl2_bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl2.nxv8bf16(<vscale x 8 x bfloat> %a,
<vscale x 8 x bfloat> %b,
@@ -80,8 +86,9 @@ define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x float> %unused, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: tbl2_fs:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl2.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
@@ -92,8 +99,9 @@ define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x floa
define <vscale x 2 x double> @tbl2_fd(<vscale x 2 x double> %a, <vscale x 2 x double> %unused, <vscale x 2 x double> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: tbl2_fd:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d
+; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl2.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
index 5eeca5fec16f1..7934f831a7e62 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
@@ -15,6 +15,8 @@ declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fclamp.nxv8bf16(<vscale x 8 x bf
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x2_f16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d){
; CHECK-LABEL: test_bfclamp_single_x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfclamp { z0.h, z1.h }, z2.h, z3.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d)
@@ -24,6 +26,10 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x2_
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x4_f16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d, <vscale x 8 x bfloat> %e, <vscale x 8 x bfloat> %f){
; CHECK-LABEL: test_bfclamp_single_x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: bfclamp { z0.h - z3.h }, z4.h, z5.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d, <vscale x 8 x bfloat> %e, <vscale x 8 x bfloat> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
index 90a4927cfa5e9..df6b34a3280a7 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
@@ -33,6 +33,8 @@ define <vscale x 2 x double> @test_fclamp_f64(<vscale x 2 x double> %a, <vscale
define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) #1 {
; CHECK-LABEL: test_fclamp_single_x2_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d)
@@ -42,6 +44,8 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<
define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) #1 {
; CHECK-LABEL: test_fclamp_single_x2_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s
; CHECK-NEXT: ret
%res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d)
@@ -51,6 +55,8 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32
define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) #1 {
; CHECK-LABEL: test_fclamp_single_x2_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d)
@@ -61,6 +67,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f
define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x4_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f) #1 {
; CHECK-LABEL: test_fclamp_single_x4_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
@@ -70,6 +80,10 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x4_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f) #1 {
; CHECK-LABEL: test_fclamp_single_x4_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s
; CHECK-NEXT: ret
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
@@ -79,6 +93,10 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x4_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f) #1 {
; CHECK-LABEL: test_fclamp_single_x4_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
index 57e1a1e100db0..8fe0694808c8e 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
@@ -7,6 +7,8 @@
define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -19,6 +21,8 @@ define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -32,6 +36,8 @@ define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -45,6 +51,8 @@ define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -58,6 +66,8 @@ define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -71,6 +81,8 @@ define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -84,6 +96,8 @@ define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -97,6 +111,8 @@ define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -111,6 +127,8 @@ define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2q_si_i8_off16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
@@ -124,6 +142,8 @@ define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2q_si_i8_off14:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
@@ -137,6 +157,8 @@ define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
@@ -150,6 +172,8 @@ define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
@@ -163,6 +187,8 @@ define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
@@ -176,6 +202,8 @@ define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
@@ -189,6 +217,8 @@ define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
@@ -202,6 +232,8 @@ define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
@@ -215,6 +247,8 @@ define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
@@ -232,6 +266,9 @@ define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -246,6 +283,9 @@ define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -260,6 +300,9 @@ define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -274,6 +317,9 @@ define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -288,6 +334,9 @@ define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -302,6 +351,9 @@ define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -316,6 +368,9 @@ define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -330,6 +385,9 @@ define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -344,6 +402,9 @@ define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i8_off24:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
@@ -358,6 +419,9 @@ define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i8_off21:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
@@ -372,6 +436,9 @@ define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
@@ -386,6 +453,9 @@ define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
@@ -400,6 +470,9 @@ define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
@@ -414,6 +487,9 @@ define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale
define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
@@ -428,6 +504,9 @@ define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
@@ -442,6 +521,9 @@ define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
@@ -456,6 +538,9 @@ define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
@@ -473,6 +558,10 @@ define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -488,6 +577,10 @@ define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -503,6 +596,10 @@ define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -518,6 +615,10 @@ define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -533,6 +634,10 @@ define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -548,6 +653,10 @@ define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -563,6 +672,10 @@ define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -578,6 +691,10 @@ define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
@@ -593,6 +710,10 @@ define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i8_off32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
@@ -608,6 +729,10 @@ define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i8_off28:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
@@ -623,6 +748,10 @@ define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
@@ -638,6 +767,10 @@ define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
@@ -653,6 +786,10 @@ define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
@@ -668,6 +805,10 @@ define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
@@ -683,6 +824,10 @@ define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
@@ -698,6 +843,10 @@ define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
@@ -713,6 +862,10 @@ define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_bf16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index 26316caad2bbc..912d5d853aa8d 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -42,6 +42,8 @@ define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
; CHECK-LABEL: test_sclamp_single_x2_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b
; CHECK-NEXT: ret
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -51,6 +53,8 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vsc
define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
; CHECK-LABEL: test_sclamp_single_x2_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -60,6 +64,8 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vs
define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
; CHECK-LABEL: test_sclamp_single_x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s
; CHECK-NEXT: ret
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -69,6 +75,8 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vs
define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
; CHECK-LABEL: test_sclamp_single_x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -78,6 +86,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vs
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
; CHECK-LABEL: test_sclamp_single_x4_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b
; CHECK-NEXT: ret
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -87,6 +99,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
; CHECK-LABEL: test_sclamp_single_x4_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -96,6 +112,10 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
; CHECK-LABEL: test_sclamp_single_x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s
; CHECK-NEXT: ret
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -105,6 +125,10 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
; CHECK-LABEL: test_sclamp_single_x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
index d64f06aaef885..3a21eaead5f72 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
@@ -8,18 +8,18 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1b { z27.b }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.b - z3.b }, pn8, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT: ld1b { z27.b }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -33,18 +33,18 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -58,18 +58,18 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -83,18 +83,18 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -108,18 +108,18 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -133,18 +133,18 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -158,18 +158,18 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -183,18 +183,18 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov z26.d, z7.d
+; CHECK-NEXT: mov z31.d, z4.d
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z25.d, z6.d
-; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
-; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0]
-; CHECK-NEXT: mov z6.d, z3.d
-; CHECK-NEXT: mov z5.d, z2.d
-; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov p8.b, p0.b
-; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0]
+; CHECK-NEXT: mov z28.d, z1.d
+; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
index 67bacbaded590..8b54a626b3660 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
@@ -10,9 +10,9 @@ define void @st1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1b { z2.b, z3.b }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -28,9 +28,9 @@ define void @st1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -46,9 +46,9 @@ define void @st1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -64,9 +64,9 @@ define void @st1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -82,9 +82,9 @@ define void @st1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -100,9 +100,9 @@ define void @st1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -118,9 +118,9 @@ define void @st1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -136,9 +136,9 @@ define void @st1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -154,11 +154,11 @@ define void @st1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1b { z4.b - z7.b }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -174,11 +174,11 @@ define void @st1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -194,11 +194,11 @@ define void @st1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -214,11 +214,11 @@ define void @st1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -234,11 +234,11 @@ define void @st1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -254,11 +254,11 @@ define void @st1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -274,11 +274,11 @@ define void @st1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -294,11 +294,11 @@ define void @st1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -316,9 +316,9 @@ define void @stnt1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1b { z2.b, z3.b }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -334,9 +334,9 @@ define void @stnt1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -352,9 +352,9 @@ define void @stnt1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -370,9 +370,9 @@ define void @stnt1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -388,9 +388,9 @@ define void @stnt1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -406,9 +406,9 @@ define void @stnt1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -424,9 +424,9 @@ define void @stnt1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -442,9 +442,9 @@ define void @stnt1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -460,11 +460,11 @@ define void @stnt1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1b { z4.b - z7.b }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -480,11 +480,11 @@ define void @stnt1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -500,11 +500,11 @@ define void @stnt1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -520,11 +520,11 @@ define void @stnt1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -540,11 +540,11 @@ define void @stnt1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -560,11 +560,11 @@ define void @stnt1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -580,11 +580,11 @@ define void @stnt1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
@@ -600,11 +600,11 @@ define void @stnt1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov z7.d, z4.d
-; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: mov z6.d, z3.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: mov z4.d, z1.d
-; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0]
; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index ca0bad16fe0e9..de1695162c98e 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -42,6 +42,8 @@ define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
; CHECK-LABEL: test_uclamp_single_x2_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b
; CHECK-NEXT: ret
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -51,6 +53,8 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vsc
define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
; CHECK-LABEL: test_uclamp_single_x2_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -60,6 +64,8 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vs
define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
; CHECK-LABEL: test_uclamp_single_x2_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s
; CHECK-NEXT: ret
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -69,6 +75,8 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vs
define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
; CHECK-LABEL: test_uclamp_single_x2_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -78,6 +86,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vs
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
; CHECK-LABEL: test_uclamp_single_x4_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b
; CHECK-NEXT: ret
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -87,6 +99,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
; CHECK-LABEL: test_uclamp_single_x4_i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h
; CHECK-NEXT: ret
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -96,6 +112,10 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
; CHECK-LABEL: test_uclamp_single_x4_i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s
; CHECK-NEXT: ret
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -105,6 +125,10 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
; CHECK-LABEL: test_uclamp_single_x4_i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d
; CHECK-NEXT: ret
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
index 741afc3a49a69..fe3ddbf747ace 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
@@ -97,11 +97,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @uzp_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
; CHECK-LABEL: uzp_x4_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z7.d, z5.d
-; CHECK-NEXT: mov z6.d, z4.d
-; CHECK-NEXT: mov z5.d, z3.d
-; CHECK-NEXT: mov z4.d, z2.d
-; CHECK-NEXT: uzp { z0.d - z3.d }, { z4.d - z7.d }
+; CHECK-NEXT: mov z27.d, z5.d
+; CHECK-NEXT: mov z26.d, z4.d
+; CHECK-NEXT: mov z25.d, z3.d
+; CHECK-NEXT: mov z24.d, z2.d
+; CHECK-NEXT: uzp { z0.d - z3.d }, { z24.d - z27.d }
; CHECK-NEXT: ret
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzp.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
@@ -204,11 +204,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @zipq_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
; CHECK-LABEL: zipq_x4_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z7.d, z5.d
-; CHECK-NEXT: mov z6.d, z4.d
-; CHECK-NEXT: mov z5.d, z3.d
-; CHECK-NEXT: mov z4.d, z2.d
-; CHECK-NEXT: uzp { z0.q - z3.q }, { z4.q - z7.q }
+; CHECK-NEXT: mov z27.d, z5.d
+; CHECK-NEXT: mov z26.d, z4.d
+; CHECK-NEXT: mov z25.d, z3.d
+; CHECK-NEXT: mov z24.d, z2.d
+; CHECK-NEXT: uzp { z0.q - z3.q }, { z24.q - z27.q }
; CHECK-NEXT: ret
%res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzpq.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
index 638849605a2cb..ab70f57b48874 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
@@ -7,6 +7,7 @@ define <vscale x 16 x i1> @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilege_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -17,6 +18,7 @@ define <vscale x 8 x i1> @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilege_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -27,6 +29,7 @@ define <vscale x 4 x i1> @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilege_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -37,6 +40,7 @@ define <vscale x 2 x i1> @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilege_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -50,6 +54,7 @@ define <vscale x 16 x i1> @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilegt_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -60,6 +65,7 @@ define <vscale x 8 x i1> @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilegt_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -70,6 +76,7 @@ define <vscale x 4 x i1> @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilegt_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -80,6 +87,7 @@ define <vscale x 2 x i1> @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilegt_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -93,6 +101,7 @@ define <vscale x 16 x i1> @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehi_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -103,6 +112,7 @@ define <vscale x 8 x i1> @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehi_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -113,6 +123,7 @@ define <vscale x 4 x i1> @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehi_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -123,6 +134,7 @@ define <vscale x 2 x i1> @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehi_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -136,6 +148,7 @@ define <vscale x 16 x i1> @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehs_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -146,6 +159,7 @@ define <vscale x 8 x i1> @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehs_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -156,6 +170,7 @@ define <vscale x 4 x i1> @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehs_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -166,6 +181,7 @@ define <vscale x 2 x i1> @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilehs_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -179,6 +195,7 @@ define <vscale x 16 x i1> @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilele_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -189,6 +206,7 @@ define <vscale x 8 x i1> @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilele_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -199,6 +217,7 @@ define <vscale x 4 x i1> @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilele_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -209,6 +228,7 @@ define <vscale x 2 x i1> @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilele_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -222,6 +242,7 @@ define <vscale x 16 x i1> @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelo_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -232,6 +253,7 @@ define <vscale x 8 x i1> @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelo_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -242,6 +264,7 @@ define <vscale x 4 x i1> @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelo_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -252,6 +275,7 @@ define <vscale x 2 x i1> @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelo_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -265,6 +289,7 @@ define <vscale x 16 x i1> @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilels_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -275,6 +300,7 @@ define <vscale x 8 x i1> @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilels_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -285,6 +311,7 @@ define <vscale x 4 x i1> @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilels_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -295,6 +322,7 @@ define <vscale x 2 x i1> @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilels_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -308,6 +336,7 @@ define <vscale x 16 x i1> @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelt_x2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -318,6 +347,7 @@ define <vscale x 8 x i1> @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelt_x2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -328,6 +358,7 @@ define <vscale x 4 x i1> @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelt_x2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -338,6 +369,7 @@ define <vscale x 2 x i1> @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
; CHECK-LABEL: whilelt_x2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
; CHECK-NEXT: ret
%pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
%res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
diff --git a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
index 77415381709d1..d3abc27a53dad 100644
--- a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
+++ b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
@@ -9,7 +9,6 @@ define void @"func"(ptr swifterror %0) #0 {
; CHECK-NEXT: b {{\.?}}LBB0_2
; CHECK-NEXT: {{\.?}}LBB0_1:{{.*}}%thirtythree
; CHECK-NEXT: {{.*}}=>This Inner Loop Header: Depth=1
-; CHECK-NEXT: {{.*}}implicit-def: $x0
; CHECK-NEXT: b {{\.?}}LBB0_1
; CHECK-NEXT: {{\.?}}LBB0_2:{{.*}}%thirtyeight
; CHECK-NEXT: b {{\.?}}LBB0_3
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index dd5ce449bb1d2..0ad9900865518 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -203,17 +203,16 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s
; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s
; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0
-; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b
-; CHECK-NEXT: mov v3.16b, v4.16b
-; CHECK-NEXT: bsl v3.16b, v0.16b, v2.16b
-; CHECK-NEXT: fcmlt v2.4s, v2.4s, #0.0
-; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b
-; CHECK-NEXT: bic v2.16b, v3.16b, v2.16b
-; CHECK-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
+; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b
+; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0
+; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b
+; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b
; CHECK-NEXT: fcvtzs v2.4s, v2.4s
-; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: fcvtzs v1.4s, v1.4s
; CHECK-NEXT: xtn v2.4h, v2.4s
-; CHECK-NEXT: trn1 v1.8b, v1.8b, v2.8b
+; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: trn1 v1.8b, v2.8b, v1.8b
; CHECK-NEXT: str d1, [x0], #8
; CHECK-NEXT: b.ne .LBB1_9
; CHECK-NEXT: // %bb.10: // %middle.block
@@ -353,22 +352,21 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s
; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s
; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0
-; CHECK-NEXT: bit v2.16b, v0.16b, v5.16b
-; CHECK-NEXT: fcmlt v5.4s, v3.4s, #0.0
-; CHECK-NEXT: bit v3.16b, v0.16b, v6.16b
-; CHECK-NEXT: mov v6.16b, v7.16b
-; CHECK-NEXT: bsl v6.16b, v0.16b, v4.16b
-; CHECK-NEXT: fcmlt v4.4s, v4.4s, #0.0
-; CHECK-NEXT: bic v2.16b, v2.16b, v16.16b
-; CHECK-NEXT: bic v3.16b, v3.16b, v5.16b
-; CHECK-NEXT: fcvtzs v2.4s, v2.4s
-; CHECK-NEXT: bic v4.16b, v6.16b, v4.16b
+; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0
+; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b
+; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b
+; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b
+; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0
+; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b
+; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b
+; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b
; CHECK-NEXT: fcvtzs v3.4s, v3.4s
; CHECK-NEXT: fcvtzs v4.4s, v4.4s
-; CHECK-NEXT: xtn v2.4h, v2.4s
-; CHECK-NEXT: xtn v3.4h, v3.4s
-; CHECK-NEXT: xtn v4.4h, v4.4s
-; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b }, v1.16b
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: xtn v5.4h, v3.4s
+; CHECK-NEXT: xtn v6.4h, v4.4s
+; CHECK-NEXT: xtn v7.4h, v2.4s
+; CHECK-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b
; CHECK-NEXT: st1 { v2.s }[2], [x13]
; CHECK-NEXT: str d2, [x0], #12
; CHECK-NEXT: b.ne .LBB2_4
@@ -607,27 +605,26 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s
; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s
; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0
-; CHECK-NEXT: bit v2.16b, v0.16b, v6.16b
-; CHECK-NEXT: fcmlt v6.4s, v3.4s, #0.0
-; CHECK-NEXT: bit v3.16b, v0.16b, v7.16b
-; CHECK-NEXT: fcmlt v7.4s, v4.4s, #0.0
-; CHECK-NEXT: bit v4.16b, v0.16b, v16.16b
-; CHECK-NEXT: mov v16.16b, v17.16b
-; CHECK-NEXT: bsl v16.16b, v0.16b, v5.16b
-; CHECK-NEXT: fcmlt v5.4s, v5.4s, #0.0
-; CHECK-NEXT: bic v2.16b, v2.16b, v18.16b
-; CHECK-NEXT: bic v3.16b, v3.16b, v6.16b
-; CHECK-NEXT: bic v4.16b, v4.16b, v7.16b
-; CHECK-NEXT: fcvtzs v2.4s, v2.4s
-; CHECK-NEXT: bic v5.16b, v16.16b, v5.16b
+; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0
+; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0
+; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b
+; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b
+; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b
+; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b
+; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0
+; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b
+; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b
+; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b
+; CHECK-NEXT: bic v2.16b, v17.16b, v2.16b
; CHECK-NEXT: fcvtzs v3.4s, v3.4s
; CHECK-NEXT: fcvtzs v4.4s, v4.4s
; CHECK-NEXT: fcvtzs v5.4s, v5.4s
-; CHECK-NEXT: xtn v2.4h, v2.4s
-; CHECK-NEXT: xtn v3.4h, v3.4s
-; CHECK-NEXT: xtn v4.4h, v4.4s
-; CHECK-NEXT: xtn v5.4h, v5.4s
-; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: xtn v16.4h, v3.4s
+; CHECK-NEXT: xtn v17.4h, v4.4s
+; CHECK-NEXT: xtn v18.4h, v5.4s
+; CHECK-NEXT: xtn v19.4h, v2.4s
+; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
; CHECK-NEXT: str q2, [x0], #16
; CHECK-NEXT: b.ne .LBB3_9
; CHECK-NEXT: // %bb.10: // %middle.block
diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index 575a4b2e6e0fb..c4a58ba12dc6b 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -74,8 +74,8 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: add x10, x9, #16
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
; CHECK-BE-NEXT: add x11, x9, #32
-; CHECK-BE-NEXT: add x9, x9, #48
; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
+; CHECK-BE-NEXT: add x9, x9, #48
; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
@@ -363,21 +363,21 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: add x9, x0, x8, lsl #7
; CHECK-BE-NEXT: add x13, x9, #64
-; CHECK-BE-NEXT: add x10, x9, #112
-; CHECK-BE-NEXT: add x11, x9, #96
; CHECK-BE-NEXT: add x12, x9, #80
; CHECK-BE-NEXT: add x14, x9, #16
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
; CHECK-BE-NEXT: ld1 { v16.16b }, [x13]
+; CHECK-BE-NEXT: add x11, x9, #96
; CHECK-BE-NEXT: add x13, x9, #32
-; CHECK-BE-NEXT: add x9, x9, #48
; CHECK-BE-NEXT: ld1 { v2.16b }, [x14]
; CHECK-BE-NEXT: ld1 { v17.16b }, [x12]
+; CHECK-BE-NEXT: add x10, x9, #112
+; CHECK-BE-NEXT: add x9, x9, #48
; CHECK-BE-NEXT: ld1 { v3.16b }, [x13]
; CHECK-BE-NEXT: ld1 { v18.16b }, [x11]
; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
-; CHECK-BE-NEXT: ld1 { v19.16b }, [x10]
; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
+; CHECK-BE-NEXT: ld1 { v19.16b }, [x10]
; CHECK-BE-NEXT: add x8, x8, #1
; CHECK-BE-NEXT: cmp x8, #1000
; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
@@ -510,8 +510,8 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: add x10, x9, #16
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
; CHECK-BE-NEXT: add x11, x9, #32
-; CHECK-BE-NEXT: add x9, x9, #48
; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
+; CHECK-BE-NEXT: add x9, x9, #48
; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
index e453d61832522..3685e9cf85bd6 100644
--- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
@@ -10,9 +10,9 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32
-; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT: str q0, [x1, x8]
+; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s
+; CHECK-NEXT: str q2, [x1, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
; CHECK-NEXT: b.ne .LBB0_1
@@ -50,10 +50,10 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
-; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s
-; CHECK-NEXT: str q0, [x1, x8]
+; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s
+; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s
+; CHECK-NEXT: str q3, [x1, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
; CHECK-NEXT: b.ne .LBB1_1
@@ -97,11 +97,11 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
; CHECK-NEXT: add x9, x1, x8
; CHECK-NEXT: add x8, x8, #32
; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s
-; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s
-; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9]
+; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s
+; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s
+; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s
+; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9]
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -149,9 +149,9 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2,
; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9]
; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10]
-; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v3.4s
-; CHECK-NEXT: str q0, [x2], #16
+; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s
+; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s
+; CHECK-NEXT: str q4, [x2], #16
; CHECK-NEXT: b.ne .LBB3_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -190,9 +190,9 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
; CHECK-NEXT: .LBB4_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32
-; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT: str q0, [x1, x8]
+; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s
+; CHECK-NEXT: str q2, [x1, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
; CHECK-NEXT: b.ne .LBB4_1
@@ -229,10 +229,10 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
; CHECK-NEXT: .LBB5_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
-; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s
-; CHECK-NEXT: str q0, [x1, x8]
+; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s
+; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s
+; CHECK-NEXT: str q3, [x1, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
; CHECK-NEXT: b.ne .LBB5_1
@@ -274,11 +274,11 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
; CHECK-NEXT: add x9, x1, x8
; CHECK-NEXT: add x8, x8, #32
; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s
-; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s
-; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9]
+; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s
+; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s
+; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s
+; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9]
; CHECK-NEXT: b.ne .LBB6_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -369,16 +369,16 @@ define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) {
; CHECK: .Lfunc_begin8:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: ldp q2, q3, [x0, #64]
-; CHECK-NEXT: ldp q4, q5, [x0, #32]
-; CHECK-NEXT: ldp q6, q7, [x0, #96]
-; CHECK-NEXT: mov v0.h[5], v1.h[4]
-; CHECK-NEXT: zip1 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: zip1 v2.8h, v4.8h, v5.8h
-; CHECK-NEXT: mov v6.h[5], v7.h[4]
-; CHECK-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-NEXT: uzp1 v1.4s, v2.4s, v6.4s
+; CHECK-NEXT: ldp q0, q2, [x0]
+; CHECK-NEXT: ldp q3, q4, [x0, #64]
+; CHECK-NEXT: ldp q5, q6, [x0, #32]
+; CHECK-NEXT: ldp q7, q16, [x0, #96]
+; CHECK-NEXT: mov v0.h[5], v2.h[4]
+; CHECK-NEXT: zip1 v2.8h, v3.8h, v4.8h
+; CHECK-NEXT: zip1 v3.8h, v5.8h, v6.8h
+; CHECK-NEXT: mov v7.h[5], v16.h[4]
+; CHECK-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-NEXT: uzp1 v1.4s, v3.4s, v7.4s
; CHECK-NEXT: zip2 v2.4s, v0.4s, v1.4s
; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x0]
; CHECK-NEXT: str q2, [x0, #64]
@@ -424,23 +424,23 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: ldr q2, [x2]
; CHECK-NEXT: ldr q3, [x4]
; CHECK-NEXT: ldr q4, [x5]
+; CHECK-NEXT: ldr q2, [x2]
; CHECK-NEXT: ldr q5, [x3]
; CHECK-NEXT: trn1 v16.8h, v0.8h, v1.8h
; CHECK-NEXT: trn2 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ldr q6, [x6]
; CHECK-NEXT: ldr q7, [x7]
; CHECK-NEXT: trn1 v17.8h, v3.8h, v4.8h
-; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h
; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h
+; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h
; CHECK-NEXT: trn2 v2.8h, v2.8h, v5.8h
; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h
; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h
; CHECK-NEXT: trn1 v4.4s, v16.4s, v17.4s
-; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s
; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s
+; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s
; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s
; CHECK-NEXT: trn1 v5.4s, v18.4s, v19.4s
; CHECK-NEXT: trn1 v7.4s, v2.4s, v3.4s
@@ -668,11 +668,11 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ext v3.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT: ext v4.16b, v1.16b, v2.16b, #12
+; CHECK-NEXT: ext v6.16b, v1.16b, v2.16b, #12
; CHECK-NEXT: zip2 v3.4s, v0.4s, v3.4s
-; CHECK-NEXT: zip2 v4.4s, v1.4s, v4.4s
; CHECK-NEXT: mov v3.s[0], v0.s[0]
; CHECK-NEXT: ext v0.16b, v2.16b, v0.16b, #12
+; CHECK-NEXT: zip2 v4.4s, v1.4s, v6.4s
; CHECK-NEXT: mov v4.s[0], v1.s[0]
; CHECK-NEXT: zip2 v5.4s, v2.4s, v0.4s
; CHECK-NEXT: mov v5.s[0], v2.s[0]
More information about the llvm-branch-commits
mailing list