[llvm] d338e53 - [AArch64] Regenerate some test checks. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 12 04:13:40 PDT 2021


Author: David Green
Date: 2021-09-12T12:13:29+01:00
New Revision: d338e535ec5f1de8b1b6cf7ea74514dfe1ecd0ce

URL: https://github.com/llvm/llvm-project/commit/d338e535ec5f1de8b1b6cf7ea74514dfe1ecd0ce
DIFF: https://github.com/llvm/llvm-project/commit/d338e535ec5f1de8b1b6cf7ea74514dfe1ecd0ce.diff

LOG: [AArch64] Regenerate some test checks. NFC

This regenerates some of the tests that had very-close-to-updated check
line already, in order to make them more maintainable.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
    llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
    llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
    llvm/test/CodeGen/AArch64/arm64-ldp.ll
    llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
    llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
    llvm/test/CodeGen/AArch64/arm64_32-addrs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 018a1143fc32d..477255bb57a71 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -1,10 +1,12 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-redzone | FileCheck %s
-; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK64
+; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK32
 
 define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) {
 ; CHECK-LABEL: store64:
-; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x2, [x0], #8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 1
   store i64 %spacing, i64* %ptr, align 4
   ret i64* %incdec.ptr
@@ -12,9 +14,11 @@ define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) {
 
 define i64* @store64idxpos256(i64* %ptr, i64 %index, i64 %spacing) {
 ; CHECK-LABEL: store64idxpos256:
-; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
-; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    str x2, [x0]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 32
   store i64 %spacing, i64* %ptr, align 4
   ret i64* %incdec.ptr
@@ -22,8 +26,9 @@ define i64* @store64idxpos256(i64* %ptr, i64 %index, i64 %spacing) {
 
 define i64* @store64idxneg256(i64* %ptr, i64 %index, i64 %spacing) {
 ; CHECK-LABEL: store64idxneg256:
-; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #-256
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x2, [x0], #-256
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 -32
   store i64 %spacing, i64* %ptr, align 4
   ret i64* %incdec.ptr
@@ -31,8 +36,9 @@ define i64* @store64idxneg256(i64* %ptr, i64 %index, i64 %spacing) {
 
 define i32* @store32(i32* %ptr, i32 %index, i32 %spacing) {
 ; CHECK-LABEL: store32:
-; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w2, [x0], #4
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
   store i32 %spacing, i32* %ptr, align 4
   ret i32* %incdec.ptr
@@ -40,9 +46,11 @@ define i32* @store32(i32* %ptr, i32 %index, i32 %spacing) {
 
 define i32* @store32idxpos256(i32* %ptr, i32 %index, i32 %spacing) {
 ; CHECK-LABEL: store32idxpos256:
-; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
-; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    str w2, [x0]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 64
   store i32 %spacing, i32* %ptr, align 4
   ret i32* %incdec.ptr
@@ -50,8 +58,9 @@ define i32* @store32idxpos256(i32* %ptr, i32 %index, i32 %spacing) {
 
 define i32* @store32idxneg256(i32* %ptr, i32 %index, i32 %spacing) {
 ; CHECK-LABEL: store32idxneg256:
-; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #-256
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w2, [x0], #-256
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 -64
   store i32 %spacing, i32* %ptr, align 4
   ret i32* %incdec.ptr
@@ -59,8 +68,9 @@ define i32* @store32idxneg256(i32* %ptr, i32 %index, i32 %spacing) {
 
 define i16* @store16(i16* %ptr, i16 %index, i16 %spacing) {
 ; CHECK-LABEL: store16:
-; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w2, [x0], #2
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 1
   store i16 %spacing, i16* %ptr, align 4
   ret i16* %incdec.ptr
@@ -68,9 +78,11 @@ define i16* @store16(i16* %ptr, i16 %index, i16 %spacing) {
 
 define i16* @store16idxpos256(i16* %ptr, i16 %index, i16 %spacing) {
 ; CHECK-LABEL: store16idxpos256:
-; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
-; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    strh w2, [x0]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 128
   store i16 %spacing, i16* %ptr, align 4
   ret i16* %incdec.ptr
@@ -78,8 +90,9 @@ define i16* @store16idxpos256(i16* %ptr, i16 %index, i16 %spacing) {
 
 define i16* @store16idxneg256(i16* %ptr, i16 %index, i16 %spacing) {
 ; CHECK-LABEL: store16idxneg256:
-; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #-256
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w2, [x0], #-256
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 -128
   store i16 %spacing, i16* %ptr, align 4
   ret i16* %incdec.ptr
@@ -87,8 +100,9 @@ define i16* @store16idxneg256(i16* %ptr, i16 %index, i16 %spacing) {
 
 define i8* @store8(i8* %ptr, i8 %index, i8 %spacing) {
 ; CHECK-LABEL: store8:
-; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w2, [x0], #1
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 1
   store i8 %spacing, i8* %ptr, align 4
   ret i8* %incdec.ptr
@@ -96,9 +110,11 @@ define i8* @store8(i8* %ptr, i8 %index, i8 %spacing) {
 
 define i8* @store8idxpos256(i8* %ptr, i8 %index, i8 %spacing) {
 ; CHECK-LABEL: store8idxpos256:
-; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
-; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    strb w2, [x0]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 256
   store i8 %spacing, i8* %ptr, align 4
   ret i8* %incdec.ptr
@@ -106,8 +122,9 @@ define i8* @store8idxpos256(i8* %ptr, i8 %index, i8 %spacing) {
 
 define i8* @store8idxneg256(i8* %ptr, i8 %index, i8 %spacing) {
 ; CHECK-LABEL: store8idxneg256:
-; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #-256
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w2, [x0], #-256
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 -256
   store i8 %spacing, i8* %ptr, align 4
   ret i8* %incdec.ptr
@@ -115,8 +132,9 @@ define i8* @store8idxneg256(i8* %ptr, i8 %index, i8 %spacing) {
 
 define i32* @truncst64to32(i32* %ptr, i32 %index, i64 %spacing) {
 ; CHECK-LABEL: truncst64to32:
-; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w2, [x0], #4
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
   %trunc = trunc i64 %spacing to i32
   store i32 %trunc, i32* %ptr, align 4
@@ -125,8 +143,9 @@ define i32* @truncst64to32(i32* %ptr, i32 %index, i64 %spacing) {
 
 define i16* @truncst64to16(i16* %ptr, i16 %index, i64 %spacing) {
 ; CHECK-LABEL: truncst64to16:
-; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w2, [x0], #2
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 1
   %trunc = trunc i64 %spacing to i16
   store i16 %trunc, i16* %ptr, align 4
@@ -135,8 +154,9 @@ define i16* @truncst64to16(i16* %ptr, i16 %index, i64 %spacing) {
 
 define i8* @truncst64to8(i8* %ptr, i8 %index, i64 %spacing) {
 ; CHECK-LABEL: truncst64to8:
-; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w2, [x0], #1
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 1
   %trunc = trunc i64 %spacing to i8
   store i8 %trunc, i8* %ptr, align 4
@@ -146,8 +166,9 @@ define i8* @truncst64to8(i8* %ptr, i8 %index, i64 %spacing) {
 
 define half* @storef16(half* %ptr, half %index, half %spacing) nounwind {
 ; CHECK-LABEL: storef16:
-; CHECK: str h{{[0-9+]}}, [x{{[0-9+]}}], #2
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str h1, [x0], #2
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds half, half* %ptr, i64 1
   store half %spacing, half* %ptr, align 2
   ret half* %incdec.ptr
@@ -155,8 +176,9 @@ define half* @storef16(half* %ptr, half %index, half %spacing) nounwind {
 
 define float* @storef32(float* %ptr, float %index, float %spacing) {
 ; CHECK-LABEL: storef32:
-; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str s1, [x0], #4
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds float, float* %ptr, i64 1
   store float %spacing, float* %ptr, align 4
   ret float* %incdec.ptr
@@ -164,8 +186,9 @@ define float* @storef32(float* %ptr, float %index, float %spacing) {
 
 define double* @storef64(double* %ptr, double %index, double %spacing) {
 ; CHECK-LABEL: storef64:
-; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str d1, [x0], #8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds double, double* %ptr, i64 1
   store double %spacing, double* %ptr, align 4
   ret double* %incdec.ptr
@@ -174,8 +197,9 @@ define double* @storef64(double* %ptr, double %index, double %spacing) {
 
 define double* @pref64(double* %ptr, double %spacing) {
 ; CHECK-LABEL: pref64:
-; CHECK:      str d0, [x0, #32]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str d0, [x0, #32]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds double, double* %ptr, i64 4
   store double %spacing, double* %incdec.ptr, align 4
   ret double *%incdec.ptr
@@ -183,8 +207,9 @@ define double* @pref64(double* %ptr, double %spacing) {
 
 define float* @pref32(float* %ptr, float %spacing) {
 ; CHECK-LABEL: pref32:
-; CHECK:      str s0, [x0, #12]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str s0, [x0, #12]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds float, float* %ptr, i64 3
   store float %spacing, float* %incdec.ptr, align 4
   ret float *%incdec.ptr
@@ -192,8 +217,9 @@ define float* @pref32(float* %ptr, float %spacing) {
 
 define half* @pref16(half* %ptr, half %spacing) nounwind {
 ; CHECK-LABEL: pref16:
-; CHECK:      str h0, [x0, #6]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str h0, [x0, #6]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds half, half* %ptr, i64 3
   store half %spacing, half* %incdec.ptr, align 2
   ret half *%incdec.ptr
@@ -201,8 +227,9 @@ define half* @pref16(half* %ptr, half %spacing) nounwind {
 
 define i64* @pre64(i64* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pre64:
-; CHECK:      str x1, [x0, #16]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x1, [x0, #16]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 2
   store i64 %spacing, i64* %incdec.ptr, align 4
   ret i64 *%incdec.ptr
@@ -210,10 +237,11 @@ define i64* @pre64(i64* %ptr, i64 %spacing) {
 
 define i64* @pre64idxpos256(i64* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pre64idxpos256:
-; CHECK:      add x8, x0, #256
-; CHECK-NEXT: str x1, [x0, #256]
-; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    str x1, [x0, #256]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 32
   store i64 %spacing, i64* %incdec.ptr, align 4
   ret i64 *%incdec.ptr
@@ -221,8 +249,9 @@ define i64* @pre64idxpos256(i64* %ptr, i64 %spacing) {
 
 define i64* @pre64idxneg256(i64* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pre64idxneg256:
-; CHECK:      str x1, [x0, #-256]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 -32
   store i64 %spacing, i64* %incdec.ptr, align 4
   ret i64 *%incdec.ptr
@@ -230,8 +259,9 @@ define i64* @pre64idxneg256(i64* %ptr, i64 %spacing) {
 
 define i32* @pre32(i32* %ptr, i32 %spacing) {
 ; CHECK-LABEL: pre32:
-; CHECK:      str w1, [x0, #8]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w1, [x0, #8]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 2
   store i32 %spacing, i32* %incdec.ptr, align 4
   ret i32 *%incdec.ptr
@@ -239,10 +269,11 @@ define i32* @pre32(i32* %ptr, i32 %spacing) {
 
 define i32* @pre32idxpos256(i32* %ptr, i32 %spacing) {
 ; CHECK-LABEL: pre32idxpos256:
-; CHECK:      add x8, x0, #256
-; CHECK-NEXT: str w1, [x0, #256]
-; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    str w1, [x0, #256]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 64
   store i32 %spacing, i32* %incdec.ptr, align 4
   ret i32 *%incdec.ptr
@@ -250,8 +281,9 @@ define i32* @pre32idxpos256(i32* %ptr, i32 %spacing) {
 
 define i32* @pre32idxneg256(i32* %ptr, i32 %spacing) {
 ; CHECK-LABEL: pre32idxneg256:
-; CHECK:      str w1, [x0, #-256]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 -64
   store i32 %spacing, i32* %incdec.ptr, align 4
   ret i32 *%incdec.ptr
@@ -259,8 +291,9 @@ define i32* @pre32idxneg256(i32* %ptr, i32 %spacing) {
 
 define i16* @pre16(i16* %ptr, i16 %spacing) {
 ; CHECK-LABEL: pre16:
-; CHECK:      strh w1, [x0, #4]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w1, [x0, #4]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 2
   store i16 %spacing, i16* %incdec.ptr, align 4
   ret i16 *%incdec.ptr
@@ -268,10 +301,11 @@ define i16* @pre16(i16* %ptr, i16 %spacing) {
 
 define i16* @pre16idxpos256(i16* %ptr, i16 %spacing) {
 ; CHECK-LABEL: pre16idxpos256:
-; CHECK:      add x8, x0, #256
-; CHECK-NEXT: strh w1, [x0, #256]
-; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    strh w1, [x0, #256]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 128
   store i16 %spacing, i16* %incdec.ptr, align 4
   ret i16 *%incdec.ptr
@@ -279,8 +313,9 @@ define i16* @pre16idxpos256(i16* %ptr, i16 %spacing) {
 
 define i16* @pre16idxneg256(i16* %ptr, i16 %spacing) {
 ; CHECK-LABEL: pre16idxneg256:
-; CHECK:      strh w1, [x0, #-256]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 -128
   store i16 %spacing, i16* %incdec.ptr, align 4
   ret i16 *%incdec.ptr
@@ -288,8 +323,9 @@ define i16* @pre16idxneg256(i16* %ptr, i16 %spacing) {
 
 define i8* @pre8(i8* %ptr, i8 %spacing) {
 ; CHECK-LABEL: pre8:
-; CHECK:      strb w1, [x0, #2]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w1, [x0, #2]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 2
   store i8 %spacing, i8* %incdec.ptr, align 4
   ret i8 *%incdec.ptr
@@ -297,10 +333,11 @@ define i8* @pre8(i8* %ptr, i8 %spacing) {
 
 define i8* @pre8idxpos256(i8* %ptr, i8 %spacing) {
 ; CHECK-LABEL: pre8idxpos256:
-; CHECK:      add x8, x0, #256
-; CHECK-NEXT: strb w1, [x0, #256]
-; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    add x8, x0, #256
+; CHECK-NEXT:    strb w1, [x0, #256]
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 256
   store i8 %spacing, i8* %incdec.ptr, align 4
   ret i8 *%incdec.ptr
@@ -308,8 +345,9 @@ define i8* @pre8idxpos256(i8* %ptr, i8 %spacing) {
 
 define i8* @pre8idxneg256(i8* %ptr, i8 %spacing) {
 ; CHECK-LABEL: pre8idxneg256:
-; CHECK:      strb w1, [x0, #-256]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 -256
   store i8 %spacing, i8* %incdec.ptr, align 4
   ret i8 *%incdec.ptr
@@ -317,8 +355,9 @@ define i8* @pre8idxneg256(i8* %ptr, i8 %spacing) {
 
 define i32* @pretrunc64to32(i32* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pretrunc64to32:
-; CHECK:      str w1, [x0, #8]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w1, [x0, #8]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 2
   %trunc = trunc i64 %spacing to i32
   store i32 %trunc, i32* %incdec.ptr, align 4
@@ -327,8 +366,9 @@ define i32* @pretrunc64to32(i32* %ptr, i64 %spacing) {
 
 define i16* @pretrunc64to16(i16* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pretrunc64to16:
-; CHECK:      strh w1, [x0, #4]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w1, [x0, #4]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 2
   %trunc = trunc i64 %spacing to i16
   store i16 %trunc, i16* %incdec.ptr, align 4
@@ -337,8 +377,9 @@ define i16* @pretrunc64to16(i16* %ptr, i64 %spacing) {
 
 define i8* @pretrunc64to8(i8* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pretrunc64to8:
-; CHECK:      strb w1, [x0, #2]!
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w1, [x0, #2]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 2
   %trunc = trunc i64 %spacing to i8
   store i8 %trunc, i8* %incdec.ptr, align 4
@@ -350,9 +391,10 @@ define i8* @pretrunc64to8(i8* %ptr, i64 %spacing) {
 ;-----
 define double* @preidxf64(double* %src, double* %out) {
 ; CHECK-LABEL: preidxf64:
-; CHECK: ldr     d0, [x0, #8]!
-; CHECK: str     d0, [x1]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #8]!
+; CHECK-NEXT:    str d0, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds double, double* %src, i64 1
   %tmp = load double, double* %ptr, align 4
   store double %tmp, double* %out, align 4
@@ -361,9 +403,10 @@ define double* @preidxf64(double* %src, double* %out) {
 
 define float* @preidxf32(float* %src, float* %out) {
 ; CHECK-LABEL: preidxf32:
-; CHECK: ldr     s0, [x0, #4]!
-; CHECK: str     s0, [x1]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, #4]!
+; CHECK-NEXT:    str s0, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds float, float* %src, i64 1
   %tmp = load float, float* %ptr, align 4
   store float %tmp, float* %out, align 4
@@ -372,9 +415,10 @@ define float* @preidxf32(float* %src, float* %out) {
 
 define half* @preidxf16(half* %src, half* %out) {
 ; CHECK-LABEL: preidxf16:
-; CHECK: ldr     h0, [x0, #2]!
-; CHECK: str     h0, [x1]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, #2]!
+; CHECK-NEXT:    str h0, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds half, half* %src, i64 1
   %tmp = load half, half* %ptr, align 2
   store half %tmp, half* %out, align 2
@@ -383,9 +427,10 @@ define half* @preidxf16(half* %src, half* %out) {
 
 define i64* @preidx64(i64* %src, i64* %out) {
 ; CHECK-LABEL: preidx64:
-; CHECK: ldr     x[[REG:[0-9]+]], [x0, #8]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr x8, [x0, #8]!
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i64, i64* %src, i64 1
   %tmp = load i64, i64* %ptr, align 4
   store i64 %tmp, i64* %out, align 4
@@ -393,9 +438,11 @@ define i64* @preidx64(i64* %src, i64* %out) {
 }
 
 define i32* @preidx32(i32* %src, i32* %out) {
-; CHECK: ldr     w[[REG:[0-9]+]], [x0, #4]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr w8, [x0, #4]!
+; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i32, i32* %src, i64 1
   %tmp = load i32, i32* %ptr, align 4
   store i32 %tmp, i32* %out, align 4
@@ -403,9 +450,11 @@ define i32* @preidx32(i32* %src, i32* %out) {
 }
 
 define i16* @preidx16zext32(i16* %src, i32* %out) {
-; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx16zext32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0, #2]!
+; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
   %tmp = load i16, i16* %ptr, align 4
   %ext = zext i16 %tmp to i32
@@ -414,9 +463,11 @@ define i16* @preidx16zext32(i16* %src, i32* %out) {
 }
 
 define i16* @preidx16zext64(i16* %src, i64* %out) {
-; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx16zext64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0, #2]!
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
   %tmp = load i16, i16* %ptr, align 4
   %ext = zext i16 %tmp to i64
@@ -425,9 +476,11 @@ define i16* @preidx16zext64(i16* %src, i64* %out) {
 }
 
 define i8* @preidx8zext32(i8* %src, i32* %out) {
-; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx8zext32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrb w8, [x0, #1]!
+; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
   %tmp = load i8, i8* %ptr, align 4
   %ext = zext i8 %tmp to i32
@@ -436,9 +489,11 @@ define i8* @preidx8zext32(i8* %src, i32* %out) {
 }
 
 define i8* @preidx8zext64(i8* %src, i64* %out) {
-; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx8zext64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrb w8, [x0, #1]!
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
   %tmp = load i8, i8* %ptr, align 4
   %ext = zext i8 %tmp to i64
@@ -447,9 +502,11 @@ define i8* @preidx8zext64(i8* %src, i64* %out) {
 }
 
 define i32* @preidx32sext64(i32* %src, i64* %out) {
-; CHECK: ldrsw   x[[REG:[0-9]+]], [x0, #4]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx32sext64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrsw x8, [x0, #4]!
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i32, i32* %src, i64 1
   %tmp = load i32, i32* %ptr, align 4
   %ext = sext i32 %tmp to i64
@@ -458,9 +515,11 @@ define i32* @preidx32sext64(i32* %src, i64* %out) {
 }
 
 define i16* @preidx16sext32(i16* %src, i32* %out) {
-; CHECK: ldrsh   w[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx16sext32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrsh w8, [x0, #2]!
+; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
   %tmp = load i16, i16* %ptr, align 4
   %ext = sext i16 %tmp to i32
@@ -469,9 +528,11 @@ define i16* @preidx16sext32(i16* %src, i32* %out) {
 }
 
 define i16* @preidx16sext64(i16* %src, i64* %out) {
-; CHECK: ldrsh   x[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx16sext64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrsh x8, [x0, #2]!
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
   %tmp = load i16, i16* %ptr, align 4
   %ext = sext i16 %tmp to i64
@@ -480,9 +541,11 @@ define i16* @preidx16sext64(i16* %src, i64* %out) {
 }
 
 define i8* @preidx8sext32(i8* %src, i32* %out) {
-; CHECK: ldrsb   w[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx8sext32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrsb w8, [x0, #1]!
+; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
   %tmp = load i8, i8* %ptr, align 4
   %ext = sext i8 %tmp to i32
@@ -491,9 +554,11 @@ define i8* @preidx8sext32(i8* %src, i32* %out) {
 }
 
 define i8* @preidx8sext64(i8* %src, i64* %out) {
-; CHECK: ldrsb   x[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
+; CHECK-LABEL: preidx8sext64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrsb x8, [x0, #1]!
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
   %tmp = load i8, i8* %ptr, align 4
   %ext = sext i8 %tmp to i64
@@ -504,8 +569,19 @@ define i8* @preidx8sext64(i8* %src, i64* %out) {
 ; This test checks if illegal post-index is generated
 
 define i64* @postidx_clobber(i64* %addr) nounwind noinline ssp {
-; CHECK-LABEL: postidx_clobber:
-; CHECK-NOT: str     x0, [x0], #8
+; CHECK64-LABEL: postidx_clobber:
+; CHECK64:       ; %bb.0:
+; CHECK64-NEXT:    mov x8, x0
+; CHECK64-NEXT:    str x0, [x8], #8
+; CHECK64-NEXT:    mov x0, x8
+; CHECK64-NEXT:    ret
+;
+; CHECK32-LABEL: postidx_clobber:
+; CHECK32:       ; %bb.0:
+; CHECK32-NEXT:    add w8, w0, #8
+; CHECK32-NEXT:    str w0, [x0]
+; CHECK32-NEXT:    mov x0, x8
+; CHECK32-NEXT:    ret
 ; ret
  %paddr = bitcast i64* %addr to i64**
  store i64* %addr, i64** %paddr

diff  --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index b3381b69a3a82..d4aaa9c1eecaf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -1,10 +1,15 @@
-; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
 
 @ptr = global i8* null
 
 define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
 ; CHECK-LABEL: test_v8i8_pre_load:
-; CHECK: ldr d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
   %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
@@ -13,7 +18,11 @@ define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
 
 define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
 ; CHECK-LABEL: test_v8i8_post_load:
-; CHECK: ldr d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0], #40
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
   %val = load <8 x i8>, <8 x i8>* %addr, align 8
   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
@@ -22,7 +31,11 @@ define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
 
 define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
 ; CHECK-LABEL: test_v8i8_pre_store:
-; CHECK: str d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
   store <8 x i8> %in, <8 x i8>* %newaddr, align 8
   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
@@ -31,7 +44,11 @@ define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
 
 define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
 ; CHECK-LABEL: test_v8i8_post_store:
-; CHECK: str d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0], #40
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
   store <8 x i8> %in, <8 x i8>* %addr, align 8
   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
@@ -40,7 +57,11 @@ define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
 
 define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
 ; CHECK-LABEL: test_v4i16_pre_load:
-; CHECK: ldr d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
   %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
@@ -49,7 +70,11 @@ define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
 
 define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
 ; CHECK-LABEL: test_v4i16_post_load:
-; CHECK: ldr d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0], #40
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
   %val = load <4 x i16>, <4 x i16>* %addr, align 8
   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
@@ -58,7 +83,11 @@ define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
 
 define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
 ; CHECK-LABEL: test_v4i16_pre_store:
-; CHECK: str d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
   store <4 x i16> %in, <4 x i16>* %newaddr, align 8
   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
@@ -67,7 +96,11 @@ define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
 
 define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
 ; CHECK-LABEL: test_v4i16_post_store:
-; CHECK: str d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0], #40
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
   store <4 x i16> %in, <4 x i16>* %addr, align 8
   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
@@ -76,7 +109,11 @@ define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
 
 define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
 ; CHECK-LABEL: test_v2i32_pre_load:
-; CHECK: ldr d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
   %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
@@ -85,7 +122,11 @@ define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
 
 define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
 ; CHECK-LABEL: test_v2i32_post_load:
-; CHECK: ldr d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0], #40
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
   %val = load <2 x i32>, <2 x i32>* %addr, align 8
   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
@@ -94,7 +135,11 @@ define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
 
 define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
 ; CHECK-LABEL: test_v2i32_pre_store:
-; CHECK: str d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
   store <2 x i32> %in, <2 x i32>* %newaddr, align 8
   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
@@ -103,7 +148,11 @@ define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
 
 define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
 ; CHECK-LABEL: test_v2i32_post_store:
-; CHECK: str d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0], #40
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
   store <2 x i32> %in, <2 x i32>* %addr, align 8
   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
@@ -112,7 +161,11 @@ define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
 
 define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
 ; CHECK-LABEL: test_v2f32_pre_load:
-; CHECK: ldr d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
   %val = load <2 x float>, <2 x float>* %newaddr, align 8
   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
@@ -121,7 +174,11 @@ define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
 
 define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
 ; CHECK-LABEL: test_v2f32_post_load:
-; CHECK: ldr d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0], #40
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
   %val = load <2 x float>, <2 x float>* %addr, align 8
   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
@@ -130,7 +187,11 @@ define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
 
 define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
 ; CHECK-LABEL: test_v2f32_pre_store:
-; CHECK: str d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
   store <2 x float> %in, <2 x float>* %newaddr, align 8
   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
@@ -139,7 +200,11 @@ define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
 
 define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
 ; CHECK-LABEL: test_v2f32_post_store:
-; CHECK: str d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0], #40
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
   store <2 x float> %in, <2 x float>* %addr, align 8
   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
@@ -148,7 +213,11 @@ define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
 
 define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
 ; CHECK-LABEL: test_v1i64_pre_load:
-; CHECK: ldr d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
   %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
@@ -157,7 +226,11 @@ define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
 
 define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
 ; CHECK-LABEL: test_v1i64_post_load:
-; CHECK: ldr d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0], #40
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
   %val = load <1 x i64>, <1 x i64>* %addr, align 8
   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
@@ -166,7 +239,11 @@ define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
 
 define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
 ; CHECK-LABEL: test_v1i64_pre_store:
-; CHECK: str d0, [x0, #40]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
   store <1 x i64> %in, <1 x i64>* %newaddr, align 8
   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
@@ -175,7 +252,11 @@ define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
 
 define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
 ; CHECK-LABEL: test_v1i64_post_store:
-; CHECK: str d0, [x0], #40
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0], #40
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
   store <1 x i64> %in, <1 x i64>* %addr, align 8
   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
@@ -184,7 +265,11 @@ define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
 
 define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
 ; CHECK-LABEL: test_v16i8_pre_load:
-; CHECK: ldr q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
   %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
@@ -193,7 +278,11 @@ define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
 
 define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
 ; CHECK-LABEL: test_v16i8_post_load:
-; CHECK: ldr q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0], #80
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
   %val = load <16 x i8>, <16 x i8>* %addr, align 8
   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
@@ -202,7 +291,11 @@ define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
 
 define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
 ; CHECK-LABEL: test_v16i8_pre_store:
-; CHECK: str q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
   store <16 x i8> %in, <16 x i8>* %newaddr, align 8
   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
@@ -211,7 +304,11 @@ define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
 
 define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
 ; CHECK-LABEL: test_v16i8_post_store:
-; CHECK: str q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0], #80
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
   store <16 x i8> %in, <16 x i8>* %addr, align 8
   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
@@ -220,7 +317,11 @@ define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
 
 define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
 ; CHECK-LABEL: test_v8i16_pre_load:
-; CHECK: ldr q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
   %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
@@ -229,7 +330,11 @@ define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
 
 define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
 ; CHECK-LABEL: test_v8i16_post_load:
-; CHECK: ldr q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0], #80
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
   %val = load <8 x i16>, <8 x i16>* %addr, align 8
   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
@@ -238,7 +343,11 @@ define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
 
 define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
 ; CHECK-LABEL: test_v8i16_pre_store:
-; CHECK: str q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
   store <8 x i16> %in, <8 x i16>* %newaddr, align 8
   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
@@ -247,7 +356,11 @@ define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
 
 define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
 ; CHECK-LABEL: test_v8i16_post_store:
-; CHECK: str q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0], #80
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
   store <8 x i16> %in, <8 x i16>* %addr, align 8
   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
@@ -256,7 +369,11 @@ define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
 
 define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
 ; CHECK-LABEL: test_v4i32_pre_load:
-; CHECK: ldr q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
   %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
@@ -265,7 +382,11 @@ define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
 
 define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
 ; CHECK-LABEL: test_v4i32_post_load:
-; CHECK: ldr q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0], #80
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
   %val = load <4 x i32>, <4 x i32>* %addr, align 8
   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
@@ -274,7 +395,11 @@ define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
 
 define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
 ; CHECK-LABEL: test_v4i32_pre_store:
-; CHECK: str q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
   store <4 x i32> %in, <4 x i32>* %newaddr, align 8
   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
@@ -283,7 +408,11 @@ define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
 
 define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
 ; CHECK-LABEL: test_v4i32_post_store:
-; CHECK: str q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0], #80
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
   store <4 x i32> %in, <4 x i32>* %addr, align 8
   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
@@ -293,7 +422,11 @@ define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
 
 define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
 ; CHECK-LABEL: test_v4f32_pre_load:
-; CHECK: ldr q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
   %val = load <4 x float>, <4 x float>* %newaddr, align 8
   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
@@ -302,7 +435,11 @@ define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
 
 define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
 ; CHECK-LABEL: test_v4f32_post_load:
-; CHECK: ldr q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0], #80
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
   %val = load <4 x float>, <4 x float>* %addr, align 8
   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
@@ -311,7 +448,11 @@ define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
 
 define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
 ; CHECK-LABEL: test_v4f32_pre_store:
-; CHECK: str q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
   store <4 x float> %in, <4 x float>* %newaddr, align 8
   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
@@ -320,7 +461,11 @@ define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
 
 define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
 ; CHECK-LABEL: test_v4f32_post_store:
-; CHECK: str q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0], #80
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
   store <4 x float> %in, <4 x float>* %addr, align 8
   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
@@ -330,7 +475,11 @@ define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
 
 define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
 ; CHECK-LABEL: test_v2i64_pre_load:
-; CHECK: ldr q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
   %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
@@ -339,7 +488,11 @@ define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
 
 define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
 ; CHECK-LABEL: test_v2i64_post_load:
-; CHECK: ldr q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0], #80
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
   %val = load <2 x i64>, <2 x i64>* %addr, align 8
   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
@@ -348,7 +501,11 @@ define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
 
 define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
 ; CHECK-LABEL: test_v2i64_pre_store:
-; CHECK: str q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
   store <2 x i64> %in, <2 x i64>* %newaddr, align 8
   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
@@ -357,7 +514,11 @@ define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
 
 define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
 ; CHECK-LABEL: test_v2i64_post_store:
-; CHECK: str q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0], #80
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
   store <2 x i64> %in, <2 x i64>* %addr, align 8
   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
@@ -367,7 +528,11 @@ define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
 
 define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
 ; CHECK-LABEL: test_v2f64_pre_load:
-; CHECK: ldr q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
   %val = load <2 x double>, <2 x double>* %newaddr, align 8
   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
@@ -376,7 +541,11 @@ define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
 
 define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
 ; CHECK-LABEL: test_v2f64_post_load:
-; CHECK: ldr q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0], #80
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
   %val = load <2 x double>, <2 x double>* %addr, align 8
   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
@@ -385,7 +554,11 @@ define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
 
 define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
 ; CHECK-LABEL: test_v2f64_pre_store:
-; CHECK: str q0, [x0, #80]!
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
   store <2 x double> %in, <2 x double>* %newaddr, align 8
   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
@@ -394,7 +567,11 @@ define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
 
 define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
 ; CHECK-LABEL: test_v2f64_post_store:
-; CHECK: str q0, [x0], #80
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0], #80
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
   store <2 x double> %in, <2 x double>* %addr, align 8
   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
@@ -403,7 +580,9 @@ define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
 
 define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
 ; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
-; CHECK: st1.b { v0 }[3], [x0], #1
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    st1.b { v0 }[3], [x0], #1
+; CHECK-NEXT:    ret
   %elt = extractelement <16 x i8> %in, i32 3
   store i8 %elt, i8* %addr
 
@@ -413,8 +592,10 @@ define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
 
 define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
 ; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #2
-; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    st1.b { v0 }[3], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <16 x i8> %in, i32 3
   store i8 %elt, i8* %addr
 
@@ -425,7 +606,9 @@ define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
 
 define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
 ; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
-; CHECK: st1.h { v0 }[3], [x0], #2
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    st1.h { v0 }[3], [x0], #2
+; CHECK-NEXT:    ret
   %elt = extractelement <8 x i16> %in, i32 3
   store i16 %elt, i16* %addr
 
@@ -435,8 +618,10 @@ define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
 
 define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
 ; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #4
-; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #4
+; CHECK-NEXT:    st1.h { v0 }[3], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <8 x i16> %in, i32 3
   store i16 %elt, i16* %addr
 
@@ -446,7 +631,9 @@ define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
 
 define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
 ; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
-; CHECK: st1.s { v0 }[3], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    st1.s { v0 }[3], [x0], #4
+; CHECK-NEXT:    ret
   %elt = extractelement <4 x i32> %in, i32 3
   store i32 %elt, i32* %addr
 
@@ -456,8 +643,10 @@ define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
 
 define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
 ; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #8
-; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    st1.s { v0 }[3], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <4 x i32> %in, i32 3
   store i32 %elt, i32* %addr
 
@@ -467,7 +656,9 @@ define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
 
 define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
 ; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
-; CHECK: st1.s { v0 }[3], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    st1.s { v0 }[3], [x0], #4
+; CHECK-NEXT:    ret
   %elt = extractelement <4 x float> %in, i32 3
   store float %elt, float* %addr
 
@@ -477,8 +668,10 @@ define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
 
 define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
 ; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #8
-; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    st1.s { v0 }[3], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <4 x float> %in, i32 3
   store float %elt, float* %addr
 
@@ -488,7 +681,9 @@ define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
 
 define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
 ; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
-; CHECK: st1.d { v0 }[1], [x0], #8
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    st1.d { v0 }[1], [x0], #8
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x i64> %in, i64 1
   store i64 %elt, i64* %addr
 
@@ -498,8 +693,10 @@ define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
 
 define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
 ; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #16
-; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #16
+; CHECK-NEXT:    st1.d { v0 }[1], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x i64> %in, i64 1
   store i64 %elt, i64* %addr
 
@@ -509,7 +706,9 @@ define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
 
 define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
 ; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
-; CHECK: st1.d { v0 }[1], [x0], #8
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    st1.d { v0 }[1], [x0], #8
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x double> %in, i32 1
   store double %elt, double* %addr
 
@@ -519,8 +718,10 @@ define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
 
 define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
 ; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #16
-; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #16
+; CHECK-NEXT:    st1.d { v0 }[1], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x double> %in, i32 1
   store double %elt, double* %addr
 
@@ -530,7 +731,10 @@ define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
 
 define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
 ; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
-; CHECK: st1.b { v0 }[3], [x0], #1
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.b { v0 }[3], [x0], #1
+; CHECK-NEXT:    ret
   %elt = extractelement <8 x i8> %in, i32 3
   store i8 %elt, i8* %addr
 
@@ -540,8 +744,11 @@ define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
 
 define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
 ; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #2
-; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.b { v0 }[3], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <8 x i8> %in, i32 3
   store i8 %elt, i8* %addr
 
@@ -551,7 +758,10 @@ define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
 
 define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
 ; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
-; CHECK: st1.h { v0 }[3], [x0], #2
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.h { v0 }[3], [x0], #2
+; CHECK-NEXT:    ret
   %elt = extractelement <4 x i16> %in, i32 3
   store i16 %elt, i16* %addr
 
@@ -561,8 +771,11 @@ define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
 
 define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
 ; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #4
-; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #4
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.h { v0 }[3], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <4 x i16> %in, i32 3
   store i16 %elt, i16* %addr
 
@@ -572,7 +785,10 @@ define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
 
 define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
 ; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
-; CHECK: st1.s { v0 }[1], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.s { v0 }[1], [x0], #4
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x i32> %in, i32 1
   store i32 %elt, i32* %addr
 
@@ -582,8 +798,11 @@ define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
 
 define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
 ; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #8
-; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.s { v0 }[1], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x i32> %in, i32 1
   store i32 %elt, i32* %addr
 
@@ -593,7 +812,10 @@ define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
 
 define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
 ; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
-; CHECK: st1.s { v0 }[1], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.s { v0 }[1], [x0], #4
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x float> %in, i32 1
   store float %elt, float* %addr
 
@@ -603,8 +825,11 @@ define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
 
 define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
 ; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
-; CHECK: mov w[[OFFSET:[0-9]+]], #8
-; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    st1.s { v0 }[1], [x0], x8
+; CHECK-NEXT:    ret
   %elt = extractelement <2 x float> %in, i32 1
   store float %elt, float* %addr
 
@@ -613,8 +838,11 @@ define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
 }
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v16i8_post_imm_ld2:
-;CHECK: ld2.16b { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v16i8_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.16b { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   store i8* %tmp, i8** %ptr
@@ -622,8 +850,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
 }
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v16i8_post_reg_ld2:
-;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.16b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -634,8 +865,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v8i8_post_imm_ld2:
-;CHECK: ld2.8b { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v8i8_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.8b { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 16
   store i8* %tmp, i8** %ptr
@@ -643,8 +877,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
 }
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i8_post_reg_ld2:
-;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.8b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -655,8 +892,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v8i16_post_imm_ld2:
-;CHECK: ld2.8h { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v8i16_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.8h { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   store i16* %tmp, i16** %ptr
@@ -664,8 +904,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
 }
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i16_post_reg_ld2:
-;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld2.8h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -676,8 +920,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v4i16_post_imm_ld2:
-;CHECK: ld2.4h { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v4i16_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.4h { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 8
   store i16* %tmp, i16** %ptr
@@ -685,8 +932,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
 }
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i16_post_reg_ld2:
-;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld2.4h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -697,8 +948,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v4i32_post_imm_ld2:
-;CHECK: ld2.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4i32_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   store i32* %tmp, i32** %ptr
@@ -706,8 +960,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
 }
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i32_post_reg_ld2:
-;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -718,8 +976,11 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v2i32_post_imm_ld2:
-;CHECK: ld2.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   store i32* %tmp, i32** %ptr
@@ -727,8 +988,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
 }
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i32_post_reg_ld2:
-;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -739,8 +1004,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v2i64_post_imm_ld2:
-;CHECK: ld2.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -748,8 +1016,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
 }
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i64_post_reg_ld2:
-;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld2.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -760,8 +1032,11 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v1i64_post_imm_ld2:
-;CHECK: ld1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 2
   store i64* %tmp, i64** %ptr
@@ -769,8 +1044,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
 }
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1i64_post_reg_ld2:
-;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -781,8 +1060,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v4f32_post_imm_ld2:
-;CHECK: ld2.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4f32_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   store float* %tmp, float** %ptr
@@ -790,8 +1072,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float**
 }
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4f32_post_reg_ld2:
-;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -802,8 +1088,11 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v2f32_post_imm_ld2:
-;CHECK: ld2.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   store float* %tmp, float** %ptr
@@ -811,8 +1100,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float**
 }
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f32_post_reg_ld2:
-;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -823,8 +1116,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v2f64_post_imm_ld2:
-;CHECK: ld2.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -832,8 +1128,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, doubl
 }
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f64_post_reg_ld2:
-;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld2.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -844,8 +1144,11 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double
 
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v1f64_post_imm_ld2:
-;CHECK: ld1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 2
   store double* %tmp, double** %ptr
@@ -853,8 +1156,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, doubl
 }
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1f64_post_reg_ld2:
-;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -865,8 +1172,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v16i8_post_imm_ld3:
-;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v16i8_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.16b { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 48
   store i8* %tmp, i8** %ptr
@@ -874,8 +1184,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8**
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v16i8_post_reg_ld3:
-;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.16b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -886,8 +1199,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v8i8_post_imm_ld3:
-;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v8i8_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.8b { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 24
   store i8* %tmp, i8** %ptr
@@ -895,8 +1211,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %pt
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i8_post_reg_ld3:
-;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.8b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -907,8 +1226,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v8i16_post_imm_ld3:
-;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v8i16_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.8h { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 24
   store i16* %tmp, i16** %ptr
@@ -916,8 +1238,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i16_post_reg_ld3:
-;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld3.8h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -928,8 +1254,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v4i16_post_imm_ld3:
-;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v4i16_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.4h { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 12
   store i16* %tmp, i16** %ptr
@@ -937,8 +1266,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i16_post_reg_ld3:
-;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld3.4h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -949,8 +1282,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v4i32_post_imm_ld3:
-;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4i32_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 12
   store i32* %tmp, i32** %ptr
@@ -958,8 +1294,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i32_post_reg_ld3:
-;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -970,8 +1310,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v2i32_post_imm_ld3:
-;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2i32_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 6
   store i32* %tmp, i32** %ptr
@@ -979,8 +1322,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i32_post_reg_ld3:
-;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -991,8 +1338,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v2i64_post_imm_ld3:
-;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2i64_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 6
   store i64* %tmp, i64** %ptr
@@ -1000,8 +1350,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i64_post_reg_ld3:
-;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld3.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1012,8 +1366,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v1i64_post_imm_ld3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 3
   store i64* %tmp, i64** %ptr
@@ -1021,8 +1378,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1i64_post_reg_ld3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1033,8 +1394,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v4f32_post_imm_ld3:
-;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4f32_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 12
   store float* %tmp, float** %ptr
@@ -1042,8 +1406,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float*
 }
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4f32_post_reg_ld3:
-;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1054,8 +1422,11 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p
 
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v2f32_post_imm_ld3:
-;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2f32_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 6
   store float* %tmp, float** %ptr
@@ -1063,8 +1434,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float*
 }
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f32_post_reg_ld3:
-;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1075,8 +1450,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v2f64_post_imm_ld3:
-;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2f64_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 6
   store double* %tmp, double** %ptr
@@ -1084,8 +1462,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(dou
 }
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f64_post_reg_ld3:
-;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld3.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1096,8 +1478,11 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f6
 
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v1f64_post_imm_ld3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 3
   store double* %tmp, double** %ptr
@@ -1105,8 +1490,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(dou
 }
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1f64_post_reg_ld3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1117,8 +1506,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f6
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v16i8_post_imm_ld4:
-;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v16i8_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 64
   store i8* %tmp, i8** %ptr
@@ -1126,8 +1518,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v16i8_post_reg_ld4:
-;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1138,8 +1533,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v1
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v8i8_post_imm_ld4:
-;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v8i8_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   store i8* %tmp, i8** %ptr
@@ -1147,8 +1545,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i8_post_reg_ld4:
-;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1159,8 +1560,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v8i16_post_imm_ld4:
-;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v8i16_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 32
   store i16* %tmp, i16** %ptr
@@ -1168,8 +1572,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i16_post_reg_ld4:
-;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1180,8 +1588,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v4i16_post_imm_ld4:
-;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v4i16_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   store i16* %tmp, i16** %ptr
@@ -1189,8 +1600,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i16_post_reg_ld4:
-;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1201,8 +1616,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v4i32_post_imm_ld4:
-;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4i32_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 16
   store i32* %tmp, i32** %ptr
@@ -1210,8 +1628,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i32_post_reg_ld4:
-;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1222,8 +1644,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v2i32_post_imm_ld4:
-;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2i32_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   store i32* %tmp, i32** %ptr
@@ -1231,8 +1656,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i32_post_reg_ld4:
-;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1243,8 +1672,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v2i64_post_imm_ld4:
-;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2i64_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 8
   store i64* %tmp, i64** %ptr
@@ -1252,8 +1684,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i64_post_reg_ld4:
-;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1264,8 +1700,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v1i64_post_imm_ld4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -1273,8 +1712,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1i64_post_reg_ld4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1285,8 +1728,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v4f32_post_imm_ld4:
-;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4f32_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 16
   store float* %tmp, float** %ptr
@@ -1294,8 +1740,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 }
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4f32_post_reg_ld4:
-;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1306,8 +1756,11 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo
 
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v2f32_post_imm_ld4:
-;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2f32_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   store float* %tmp, float** %ptr
@@ -1315,8 +1768,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 }
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f32_post_reg_ld4:
-;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1327,8 +1784,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v2f64_post_imm_ld4:
-;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2f64_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 8
   store double* %tmp, double** %ptr
@@ -1336,8 +1796,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 }
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f64_post_reg_ld4:
-;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1348,8 +1812,11 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64
 
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v1f64_post_imm_ld4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -1357,8 +1824,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 }
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1f64_post_reg_ld4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1368,8 +1839,11 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
-;CHECK: ld1.16b { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v16i8_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   store i8* %tmp, i8** %ptr
@@ -1377,8 +1851,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
 }
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
-;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1389,8 +1866,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
-;CHECK: ld1.8b { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v8i8_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 16
   store i8* %tmp, i8** %ptr
@@ -1398,8 +1878,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
 }
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
-;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1410,8 +1893,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
-;CHECK: ld1.8h { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v8i16_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8h { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   store i16* %tmp, i16** %ptr
@@ -1419,8 +1905,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr)
 }
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
-;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.8h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1431,8 +1921,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
-;CHECK: ld1.4h { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v4i16_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4h { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 8
   store i16* %tmp, i16** %ptr
@@ -1440,8 +1933,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr)
 }
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
-;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.4h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1452,8 +1949,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
-;CHECK: ld1.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4i32_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   store i32* %tmp, i32** %ptr
@@ -1461,8 +1961,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr)
 }
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
-;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1473,8 +1977,11 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
-;CHECK: ld1.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   store i32* %tmp, i32** %ptr
@@ -1482,8 +1989,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr)
 }
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
-;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1494,8 +2005,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
-;CHECK: ld1.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -1503,8 +2017,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr)
 }
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
-;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1515,8 +2033,11 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
-;CHECK: ld1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 2
   store i64* %tmp, i64** %ptr
@@ -1524,8 +2045,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr)
 }
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
-;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1536,8 +2061,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
-;CHECK: ld1.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4f32_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   store float* %tmp, float** %ptr
@@ -1545,8 +2073,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float*
 }
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
-;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1557,8 +2089,11 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*
 
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
-;CHECK: ld1.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   store float* %tmp, float** %ptr
@@ -1566,8 +2101,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float*
 }
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
-;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1578,8 +2117,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
-;CHECK: ld1.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -1587,8 +2129,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, dou
 }
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
-;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1599,8 +2145,11 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(doub
 
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
-;CHECK: ld1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 2
   store double* %tmp, double** %ptr
@@ -1608,8 +2157,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, dou
 }
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
-;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1620,8 +2173,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(doub
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
-;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v16i8_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 48
   store i8* %tmp, i8** %ptr
@@ -1629,8 +2185,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
-;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1641,8 +2200,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
-;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v8i8_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 24
   store i8* %tmp, i8** %ptr
@@ -1650,8 +2212,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
-;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1662,8 +2227,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
-;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v8i16_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8h { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 24
   store i16* %tmp, i16** %ptr
@@ -1671,8 +2239,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
-;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.8h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1683,8 +2255,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
-;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v4i16_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4h { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 12
   store i16* %tmp, i16** %ptr
@@ -1692,8 +2267,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
-;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.4h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1704,8 +2283,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
-;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4i32_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 12
   store i32* %tmp, i32** %ptr
@@ -1713,8 +2295,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
-;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1725,8 +2311,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
-;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2i32_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 6
   store i32* %tmp, i32** %ptr
@@ -1734,8 +2323,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
-;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1746,8 +2339,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
-;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2i64_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 6
   store i64* %tmp, i64** %ptr
@@ -1755,8 +2351,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
-;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1767,8 +2367,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 3
   store i64* %tmp, i64** %ptr
@@ -1776,8 +2379,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -1788,8 +2395,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
-;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4f32_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 12
   store float* %tmp, float** %ptr
@@ -1797,8 +2407,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(floa
 }
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
-;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1809,8 +2423,11 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32
 
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
-;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2f32_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 6
   store float* %tmp, float** %ptr
@@ -1818,8 +2435,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(floa
 }
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
-;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -1830,8 +2451,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
-;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2f64_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 6
   store double* %tmp, double** %ptr
@@ -1839,8 +2463,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(d
 }
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
-;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1851,8 +2479,11 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2
 
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 3
   store double* %tmp, double** %ptr
@@ -1860,8 +2491,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(d
 }
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
-;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -1872,8 +2507,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
-;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v16i8_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 64
   store i8* %tmp, i8** %ptr
@@ -1881,8 +2519,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
-;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1893,8 +2534,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
-;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
-;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v8i8_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   store i8* %tmp, i8** %ptr
@@ -1902,8 +2546,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8*
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
-;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -1914,8 +2561,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
-;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v8i16_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 32
   store i16* %tmp, i16** %ptr
@@ -1923,8 +2573,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
-;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1935,8 +2589,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
-;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
-;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v4i16_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   store i16* %tmp, i16** %ptr
@@ -1944,8 +2601,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
-;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -1956,8 +2617,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
-;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4i32_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 16
   store i32* %tmp, i32** %ptr
@@ -1965,8 +2629,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
-;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1977,8 +2645,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
-;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
-;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2i32_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   store i32* %tmp, i32** %ptr
@@ -1986,8 +2657,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
-;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -1998,8 +2673,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
-;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2i64_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 8
   store i64* %tmp, i64** %ptr
@@ -2007,8 +2685,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
-;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2019,8 +2701,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
-;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -2028,8 +2713,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2040,8 +2729,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
-;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4f32_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 16
   store float* %tmp, float** %ptr
@@ -2049,8 +2741,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 }
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
-;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2061,8 +2757,11 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo
 
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
-;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
-;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2f32_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   store float* %tmp, float** %ptr
@@ -2070,8 +2769,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 }
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
-;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2082,8 +2785,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
-;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2f64_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 8
   store double* %tmp, double** %ptr
@@ -2091,8 +2797,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 }
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
-;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2103,8 +2813,11 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64
 
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
-;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -2112,8 +2825,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 }
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
-;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
-;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2124,8 +2841,11 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_ld2r:
-;CHECK: ld2r.16b { v0, v1 }, [x0], #2
+; CHECK-LABEL: test_v16i8_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.16b { v0, v1 }, [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 2
   store i8* %tmp, i8** %ptr
@@ -2133,8 +2853,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nou
 }
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_ld2r:
-;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.16b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2145,8 +2868,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwin
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_ld2r:
-;CHECK: ld2r.8b { v0, v1 }, [x0], #2
+; CHECK-LABEL: test_v8i8_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.8b { v0, v1 }, [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 2
   store i8* %tmp, i8** %ptr
@@ -2154,8 +2880,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwi
 }
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_ld2r:
-;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.8b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2166,8 +2895,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind r
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_ld2r:
-;CHECK: ld2r.8h { v0, v1 }, [x0], #4
+; CHECK-LABEL: test_v8i16_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.8h { v0, v1 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 2
   store i16* %tmp, i16** %ptr
@@ -2175,8 +2907,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) n
 }
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_ld2r:
-;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld2r.8h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2187,8 +2923,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounw
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_ld2r:
-;CHECK: ld2r.4h { v0, v1 }, [x0], #4
+; CHECK-LABEL: test_v4i16_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.4h { v0, v1 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 2
   store i16* %tmp, i16** %ptr
@@ -2196,8 +2935,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) n
 }
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_ld2r:
-;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld2r.4h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2208,8 +2951,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounw
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_ld2r:
-;CHECK: ld2r.4s { v0, v1 }, [x0], #8
+; CHECK-LABEL: test_v4i32_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.4s { v0, v1 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 2
   store i32* %tmp, i32** %ptr
@@ -2217,8 +2963,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) n
 }
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_ld2r:
-;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2r.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2228,8 +2978,11 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i
 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_ld2r:
-;CHECK: ld2r.2s { v0, v1 }, [x0], #8
+; CHECK-LABEL: test_v2i32_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.2s { v0, v1 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 2
   store i32* %tmp, i32** %ptr
@@ -2237,8 +2990,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) n
 }
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_ld2r:
-;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2r.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2249,8 +3006,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounw
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_ld2r:
-;CHECK: ld2r.2d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2i64_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.2d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 2
   store i64* %tmp, i64** %ptr
@@ -2258,8 +3018,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) n
 }
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_ld2r:
-;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld2r.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2269,8 +3033,11 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i
 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_ld2r:
-;CHECK: ld2r.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 2
   store i64* %tmp, i64** %ptr
@@ -2278,8 +3045,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) n
 }
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_ld2r:
-;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld2r.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2290,8 +3061,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounw
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_ld2r:
-;CHECK: ld2r.4s { v0, v1 }, [x0], #8
+; CHECK-LABEL: test_v4f32_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.4s { v0, v1 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 2
   store float* %tmp, float** %ptr
@@ -2299,8 +3073,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float**
 }
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_ld2r:
-;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2r.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2310,8 +3088,11 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float**
 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_ld2r:
-;CHECK: ld2r.2s { v0, v1 }, [x0], #8
+; CHECK-LABEL: test_v2f32_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.2s { v0, v1 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 2
   store float* %tmp, float** %ptr
@@ -2319,8 +3100,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float**
 }
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_ld2r:
-;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld2r.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2331,8 +3116,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_ld2r:
-;CHECK: ld2r.2d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2f64_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.2d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 2
   store double* %tmp, double** %ptr
@@ -2340,8 +3128,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, doub
 }
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_ld2r:
-;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld2r.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2351,8 +3143,11 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, doub
 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_ld2r:
-;CHECK: ld2r.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld2r.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 2
   store double* %tmp, double** %ptr
@@ -2360,8 +3155,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, doub
 }
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_ld2r:
-;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld2r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld2r.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2372,8 +3171,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(doubl
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_ld3r:
-;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
+; CHECK-LABEL: test_v16i8_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.16b { v0, v1, v2 }, [x0], #3
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 3
   store i8* %tmp, i8** %ptr
@@ -2381,8 +3183,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8*
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_ld3r:
-;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.16b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2393,8 +3198,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_ld3r:
-;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
+; CHECK-LABEL: test_v8i8_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.8b { v0, v1, v2 }, [x0], #3
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 3
   store i8* %tmp, i8** %ptr
@@ -2402,8 +3210,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %p
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_ld3r:
-;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.8b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2414,8 +3225,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_ld3r:
-;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
+; CHECK-LABEL: test_v8i16_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.8h { v0, v1, v2 }, [x0], #6
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 3
   store i16* %tmp, i16** %ptr
@@ -2423,8 +3237,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i1
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_ld3r:
-;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld3r.8h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2435,8 +3253,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_ld3r:
-;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
+; CHECK-LABEL: test_v4i16_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.4h { v0, v1, v2 }, [x0], #6
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 3
   store i16* %tmp, i16** %ptr
@@ -2444,8 +3265,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i1
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_ld3r:
-;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld3r.4h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2456,8 +3281,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_ld3r:
-;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
+; CHECK-LABEL: test_v4i32_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 3
   store i32* %tmp, i32** %ptr
@@ -2465,8 +3293,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i3
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_ld3r:
-;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2476,8 +3308,11 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i3
 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_ld3r:
-;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
+; CHECK-LABEL: test_v2i32_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 3
   store i32* %tmp, i32** %ptr
@@ -2485,8 +3320,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i3
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_ld3r:
-;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2497,8 +3336,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_ld3r:
-;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2i64_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 3
   store i64* %tmp, i64** %ptr
@@ -2506,8 +3348,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i6
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_ld3r:
-;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2517,8 +3363,11 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i6
 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_ld3r:
-;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 3
   store i64* %tmp, i64** %ptr
@@ -2526,8 +3375,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i6
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_ld3r:
-;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2538,8 +3391,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_ld3r:
-;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
+; CHECK-LABEL: test_v4f32_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 3
   store float* %tmp, float** %ptr
@@ -2547,8 +3403,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float
 }
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_ld3r:
-;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2558,8 +3418,11 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float
 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_ld3r:
-;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
+; CHECK-LABEL: test_v2f32_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 3
   store float* %tmp, float** %ptr
@@ -2567,8 +3430,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float
 }
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_ld3r:
-;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2579,8 +3446,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_ld3r:
-;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2f64_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 3
   store double* %tmp, double** %ptr
@@ -2588,8 +3458,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(do
 }
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_ld3r:
-;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2599,8 +3473,11 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(do
 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_ld3r:
-;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 3
   store double* %tmp, double** %ptr
@@ -2608,8 +3485,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(do
 }
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_ld3r:
-;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld3r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2620,8 +3501,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_ld4r:
-;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
+; CHECK-LABEL: test_v16i8_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 4
   store i8* %tmp, i8** %ptr
@@ -2629,8 +3513,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_ld4r:
-;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2641,8 +3528,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_ld4r:
-;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
+; CHECK-LABEL: test_v8i8_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 4
   store i8* %tmp, i8** %ptr
@@ -2650,8 +3540,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_ld4r:
-;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2662,8 +3555,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_ld4r:
-;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
+; CHECK-LABEL: test_v8i16_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 4
   store i16* %tmp, i16** %ptr
@@ -2671,8 +3567,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_ld4r:
-;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2683,8 +3583,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_ld4r:
-;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
+; CHECK-LABEL: test_v4i16_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 4
   store i16* %tmp, i16** %ptr
@@ -2692,8 +3595,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_ld4r:
-;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2704,8 +3611,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_ld4r:
-;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-LABEL: test_v4i32_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   store i32* %tmp, i32** %ptr
@@ -2713,8 +3623,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_ld4r:
-;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2724,8 +3638,11 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(
 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_ld4r:
-;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   store i32* %tmp, i32** %ptr
@@ -2733,8 +3650,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_ld4r:
-;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2745,8 +3666,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_ld4r:
-;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -2754,8 +3678,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_ld4r:
-;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2765,8 +3693,11 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(
 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_ld4r:
-;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -2774,8 +3705,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_ld4r:
-;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -2786,8 +3721,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_ld4r:
-;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-LABEL: test_v4f32_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   store float* %tmp, float** %ptr
@@ -2795,8 +3733,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 }
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_ld4r:
-;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2806,8 +3748,11 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r
 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_ld4r:
-;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   store float* %tmp, float** %ptr
@@ -2815,8 +3760,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 }
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_ld4r:
-;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -2827,8 +3776,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_ld4r:
-;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -2836,8 +3788,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 }
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_ld4r:
-;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2847,8 +3803,11 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_ld4r:
-;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -2856,8 +3815,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 }
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_ld4r:
-;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld4r:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -2868,8 +3831,13 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
-;CHECK: ld2.b { v0, v1 }[0], [x0], #2
+; CHECK-LABEL: test_v16i8_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 2
   store i8* %tmp, i8** %ptr
@@ -2877,8 +3845,13 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr,
 }
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
-;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2889,8 +3862,13 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
-;CHECK: ld2.b { v0, v1 }[0], [x0], #2
+; CHECK-LABEL: test_v8i8_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 2
   store i8* %tmp, i8** %ptr
@@ -2898,8 +3876,13 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8
 }
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
-;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -2910,8 +3893,13 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
-;CHECK: ld2.h { v0, v1 }[0], [x0], #4
+; CHECK-LABEL: test_v8i16_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 2
   store i16* %tmp, i16** %ptr
@@ -2919,8 +3907,14 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr
 }
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
-;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2931,8 +3925,13 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
-;CHECK: ld2.h { v0, v1 }[0], [x0], #4
+; CHECK-LABEL: test_v4i16_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 2
   store i16* %tmp, i16** %ptr
@@ -2940,8 +3939,14 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr
 }
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
-;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -2952,8 +3957,13 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v4i32_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 2
   store i32* %tmp, i32** %ptr
@@ -2961,8 +3971,14 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr
 }
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2973,8 +3989,13 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32
 
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v2i32_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 2
   store i32* %tmp, i32** %ptr
@@ -2982,8 +4003,14 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr
 }
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -2994,8 +4021,13 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v2i64_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 2
   store i64* %tmp, i64** %ptr
@@ -3003,8 +4035,14 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr
 }
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -3015,8 +4053,13 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64
 
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 2
   store i64* %tmp, i64** %ptr
@@ -3024,8 +4067,14 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr
 }
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -3036,8 +4085,13 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v4f32_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 2
   store float* %tmp, float** %ptr
@@ -3045,8 +4099,14 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, floa
 }
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -3057,8 +4117,13 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x
 
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v2f32_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 2
   store float* %tmp, float** %ptr
@@ -3066,8 +4131,14 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, floa
 }
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
-;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -3078,8 +4149,13 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v2f64_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i32 2
   store double* %tmp, double** %ptr
@@ -3087,8 +4163,14 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, d
 }
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -3099,8 +4181,13 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2
 
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i32 2
   store double* %tmp, double** %ptr
@@ -3108,8 +4195,14 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, d
 }
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
-;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -3120,8 +4213,14 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
-;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-LABEL: test_v16i8_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 3
   store i8* %tmp, i8** %ptr
@@ -3129,8 +4228,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A,
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
-;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -3141,8 +4246,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
-;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-LABEL: test_v8i8_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 3
   store i8* %tmp, i8** %ptr
@@ -3150,8 +4261,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8**
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
-;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -3162,8 +4279,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
-;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-LABEL: test_v8i16_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 3
   store i16* %tmp, i16** %ptr
@@ -3171,8 +4294,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A,
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
-;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -3183,8 +4313,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
-;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-LABEL: test_v4i16_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 3
   store i16* %tmp, i16** %ptr
@@ -3192,8 +4328,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A,
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
-;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -3204,8 +4347,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v4i32_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 3
   store i32* %tmp, i32** %ptr
@@ -3213,8 +4362,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A,
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -3225,8 +4381,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v2i32_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 3
   store i32* %tmp, i32** %ptr
@@ -3234,8 +4396,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A,
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -3246,8 +4415,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v2i64_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 3
   store i64* %tmp, i64** %ptr
@@ -3255,8 +4430,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A,
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -3267,8 +4449,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 3
   store i64* %tmp, i64** %ptr
@@ -3276,8 +4464,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A,
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -3288,8 +4483,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v4f32_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 3
   store float* %tmp, float** %ptr
@@ -3297,8 +4498,15 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(fl
 }
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -3309,8 +4517,14 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f
 
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v2f32_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 3
   store float* %tmp, float** %ptr
@@ -3318,8 +4532,15 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(fl
 }
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
-;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -3330,8 +4551,14 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v2f64_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i32 3
   store double* %tmp, double** %ptr
@@ -3339,8 +4566,15 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane
 }
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -3351,8 +4585,14 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.
 
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i32 3
   store double* %tmp, double** %ptr
@@ -3360,8 +4600,15 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane
 }
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
-;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -3372,8 +4619,15 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
-;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-LABEL: test_v16i8_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 4
   store i8* %tmp, i8** %ptr
@@ -3381,8 +4635,15 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la
 }
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
-;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -3393,8 +4654,15 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
-;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-LABEL: test_v8i8_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 4
   store i8* %tmp, i8** %ptr
@@ -3402,8 +4670,15 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8
 }
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
-;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
@@ -3414,8 +4689,15 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
-;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-LABEL: test_v8i16_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 4
   store i16* %tmp, i16** %ptr
@@ -3423,8 +4705,16 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la
 }
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
-;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -3435,8 +4725,15 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
-;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-LABEL: test_v4i16_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 4
   store i16* %tmp, i16** %ptr
@@ -3444,8 +4741,16 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la
 }
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
-;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
@@ -3456,8 +4761,15 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v4i32_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   store i32* %tmp, i32** %ptr
@@ -3465,8 +4777,16 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la
 }
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -3477,8 +4797,15 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   store i32* %tmp, i32** %ptr
@@ -3486,8 +4813,16 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la
 }
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
@@ -3498,8 +4833,15 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -3507,8 +4849,16 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la
 }
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -3519,8 +4869,15 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i32 4
   store i64* %tmp, i64** %ptr
@@ -3528,8 +4885,16 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la
 }
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
@@ -3540,8 +4905,15 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v4f32_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   store float* %tmp, float** %ptr
@@ -3549,8 +4921,16 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 }
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -3561,8 +4941,15 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo
 
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   store float* %tmp, float** %ptr
@@ -3570,8 +4957,16 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 }
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
-;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   store float* %tmp, float** %ptr
@@ -3582,8 +4977,15 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -3591,8 +4993,16 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 }
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -3603,8 +5013,15 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64
 
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i32 4
   store double* %tmp, double** %ptr
@@ -3612,8 +5029,16 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 }
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
-;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_ld4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   store double* %tmp, double** %ptr
@@ -3624,16 +5049,24 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 
 
 define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st2:
-;CHECK: st2.16b { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v16i8_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.16b { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st2:
-;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.16b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -3643,16 +5076,24 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st2:
-;CHECK: st2.8b { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v8i8_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.8b { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 16
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st2:
-;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.8b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -3662,16 +5103,25 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st2:
-;CHECK: st2.8h { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v8i16_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.8h { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st2:
-;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.8h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -3681,16 +5131,25 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st2:
-;CHECK: st2.4h { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v4i16_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.4h { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 8
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st2:
-;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.4h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -3700,16 +5159,25 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
 
 
 define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st2:
-;CHECK: st2.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4i32_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st2:
-;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -3719,16 +5187,25 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
 
 
 define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st2:
-;CHECK: st2.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st2:
-;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -3738,16 +5215,25 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st2:
-;CHECK: st2.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 4
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st2:
-;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -3757,16 +5243,25 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
 
 
 define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st2:
-;CHECK: st1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 2
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st2:
-;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -3776,16 +5271,25 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
 
 
 define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st2:
-;CHECK: st2.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4f32_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st2:
-;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -3795,16 +5299,25 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*
 
 
 define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st2:
-;CHECK: st2.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st2:
-;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st2.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -3814,16 +5327,25 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*
 
 
 define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st2:
-;CHECK: st2.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 4
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st2:
-;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -3833,16 +5355,25 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, doub
 
 
 define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st2:
-;CHECK: st1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 2
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st2:
-;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -3852,16 +5383,26 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, doub
 
 
 define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st3:
-;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v16i8_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.16b { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 48
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st3:
-;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.16b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -3871,16 +5412,26 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>,
 
 
 define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st3:
-;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v8i8_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.8b { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 24
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st3:
-;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.8b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -3890,16 +5441,27 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st3:
-;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v8i16_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.8h { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 24
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st3:
-;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.8h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -3909,16 +5471,27 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>,
 
 
 define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st3:
-;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v4i16_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.4h { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 12
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st3:
-;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.4h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -3928,16 +5501,27 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,
 
 
 define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st3:
-;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4i32_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 12
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st3:
-;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -3947,16 +5531,27 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,
 
 
 define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st3:
-;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2i32_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 6
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st3:
-;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -3966,16 +5561,27 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>,
 
 
 define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st3:
-;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2i64_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 6
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st3:
-;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -3985,16 +5591,27 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,
 
 
 define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 3
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4004,16 +5621,27 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,
 
 
 define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st3:
-;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4f32_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i32 12
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st3:
-;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4023,16 +5651,27 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x f
 
 
 define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st3:
-;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2f32_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i32 6
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st3:
-;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4042,16 +5681,27 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x f
 
 
 define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st3:
-;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2f64_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 6
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st3:
-;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4061,16 +5711,27 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x
 
 
 define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 3
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4080,16 +5741,28 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x
 
 
 define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st4:
-;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v16i8_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 64
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st4:
-;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4099,16 +5772,28 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>,
 
 
 define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st4:
-;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v8i8_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st4:
-;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4118,16 +5803,29 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x
 
 
 define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st4:
-;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v8i16_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 32
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st4:
-;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4137,16 +5835,29 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>,
 
 
 define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st4:
-;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v4i16_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st4:
-;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4156,16 +5867,29 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,
 
 
 define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st4:
-;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4i32_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 16
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st4:
-;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4175,16 +5899,29 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,
 
 
 define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st4:
-;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2i32_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st4:
-;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4194,16 +5931,29 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>,
 
 
 define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st4:
-;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2i64_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 8
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st4:
-;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4213,16 +5963,29 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,
 
 
 define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 4
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4232,16 +5995,29 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,
 
 
 define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st4:
-;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4f32_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i32 16
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st4:
-;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4251,16 +6027,29 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x f
 
 
 define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st4:
-;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2f32_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st4:
-;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4270,16 +6059,29 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x f
 
 
 define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st4:
-;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2f64_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 8
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st4:
-;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4289,16 +6091,29 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x
 
 
 define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 4
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4308,16 +6123,24 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x
 
 
 define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st1x2:
-;CHECK: st1.16b { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v16i8_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.16b { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st1x2:
-;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.16b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4327,16 +6150,24 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st1x2:
-;CHECK: st1.8b { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v8i8_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.8b { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 16
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st1x2:
-;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.8b { v0, v1 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4346,16 +6177,25 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st1x2:
-;CHECK: st1.8h { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v8i16_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.8h { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st1x2:
-;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.8h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4365,16 +6205,25 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st1x2:
-;CHECK: st1.4h { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v4i16_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.4h { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 8
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st1x2:
-;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.4h { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4384,16 +6233,25 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
 
 
 define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st1x2:
-;CHECK: st1.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4i32_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st1x2:
-;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4403,16 +6261,25 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
 
 
 define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st1x2:
-;CHECK: st1.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st1x2:
-;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4422,16 +6289,25 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st1x2:
-;CHECK: st1.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 4
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st1x2:
-;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4441,16 +6317,25 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
 
 
 define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st1x2:
-;CHECK: st1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 2
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st1x2:
-;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4460,16 +6345,25 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
 
 
 define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st1x2:
-;CHECK: st1.4s { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v4f32_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.4s { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st1x2:
-;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.4s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4479,16 +6373,25 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, floa
 
 
 define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st1x2:
-;CHECK: st1.2s { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.2s { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st1x2:
-;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.2s { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4498,16 +6401,25 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, floa
 
 
 define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st1x2:
-;CHECK: st1.2d { v0, v1 }, [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.2d { v0, v1 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 4
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st1x2:
-;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st1.2d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4517,16 +6429,25 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, do
 
 
 define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st1x2:
-;CHECK: st1.1d { v0, v1 }, [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 2
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st1x2:
-;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st1x2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-NEXT:    st1.1d { v0, v1 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4536,16 +6457,26 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, do
 
 
 define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st1x3:
-;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v16i8_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.16b { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 48
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st1x3:
-;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.16b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4555,16 +6486,26 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>
 
 
 define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st1x3:
-;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v8i8_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.8b { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 24
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st1x3:
-;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.8b { v0, v1, v2 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4574,16 +6515,27 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8
 
 
 define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st1x3:
-;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v8i16_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.8h { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 24
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st1x3:
-;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.8h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4593,16 +6545,27 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16
 
 
 define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st1x3:
-;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v4i16_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.4h { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 12
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st1x3:
-;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.4h { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4612,16 +6575,27 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16
 
 
 define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st1x3:
-;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4i32_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 12
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st1x3:
-;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4631,16 +6605,27 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32
 
 
 define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st1x3:
-;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2i32_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 6
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st1x3:
-;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4650,16 +6635,27 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32
 
 
 define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st1x3:
-;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2i64_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 6
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st1x3:
-;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4669,16 +6665,27 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64
 
 
 define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st1x3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 3
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st1x3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4688,16 +6695,27 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64
 
 
 define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st1x3:
-;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v4f32_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i32 12
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st1x3:
-;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4707,16 +6725,27 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x
 
 
 define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st1x3:
-;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v2f32_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i32 6
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st1x3:
-;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4726,16 +6755,27 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x
 
 
 define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st1x3:
-;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-LABEL: test_v2f64_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 6
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st1x3:
-;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4745,16 +6785,27 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2
 
 
 define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st1x3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 3
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st1x3:
-;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st1x3:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4764,16 +6815,28 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1
 
 
 define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st1x4:
-;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v16i8_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 64
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st1x4:
-;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4783,16 +6846,28 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>
 
 
 define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st1x4:
-;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v8i8_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 32
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st1x4:
-;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -4802,16 +6877,29 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8
 
 
 define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st1x4:
-;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v8i16_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 32
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st1x4:
-;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4821,16 +6909,29 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16
 
 
 define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st1x4:
-;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v4i16_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 16
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st1x4:
-;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -4840,16 +6941,29 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16
 
 
 define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st1x4:
-;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4i32_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 16
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st1x4:
-;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4859,16 +6973,29 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32
 
 
 define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st1x4:
-;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2i32_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 8
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st1x4:
-;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -4878,16 +7005,29 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32
 
 
 define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st1x4:
-;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2i64_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 8
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st1x4:
-;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4897,16 +7037,29 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64
 
 
 define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st1x4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 4
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st1x4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -4916,16 +7069,29 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64
 
 
 define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st1x4:
-;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v4f32_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i32 16
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st1x4:
-;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4935,16 +7101,29 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x
 
 
 define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st1x4:
-;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v2f32_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i32 8
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st1x4:
-;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -4954,16 +7133,29 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x
 
 
 define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st1x4:
-;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-LABEL: test_v2f64_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 8
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st1x4:
-;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4973,16 +7165,29 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2
 
 
 define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st1x4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 4
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st1x4:
-;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st1x4:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -4990,33 +7195,25 @@ define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double
 
 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
 
-
-define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
-  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
-  %tmp = getelementptr i8, i8* %A, i32 2
-  ret i8* %tmp
-}
-
-define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
-  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
-  %tmp = getelementptr i8, i8* %A, i64 %inc
-  ret i8* %tmp
-}
-
-declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
-
-
 define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st2lane:
-;CHECK: st2.b { v0, v1 }[0], [x0], #2
+; CHECK-LABEL: test_v16i8_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.b { v0, v1 }[0], [x0], #2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 2
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st2lane:
-;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.b { v0, v1 }[0], [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -5026,16 +7223,24 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8
 
 
 define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st2lane:
-;CHECK: st2.b { v0, v1 }[0], [x0], #2
+; CHECK-LABEL: test_v8i8_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.b { v0, v1 }[0], [x0], #2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 2
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st2lane:
-;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.b { v0, v1 }[0], [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -5045,16 +7250,25 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
 
 
 define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st2lane:
-;CHECK: st2.h { v0, v1 }[0], [x0], #4
+; CHECK-LABEL: test_v8i16_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.h { v0, v1 }[0], [x0], #4
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 2
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st2lane:
-;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.h { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -5064,16 +7278,25 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i
 
 
 define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st2lane:
-;CHECK: st2.h { v0, v1 }[0], [x0], #4
+; CHECK-LABEL: test_v4i16_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.h { v0, v1 }[0], [x0], #4
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 2
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st2lane:
-;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.h { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -5083,16 +7306,25 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i
 
 
 define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v4i32_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 2
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -5102,16 +7334,25 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i
 
 
 define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v2i32_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 2
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -5121,16 +7362,25 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i
 
 
 define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v2i64_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 2
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -5140,16 +7390,25 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i
 
 
 define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v1i64_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 2
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -5159,16 +7418,25 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i
 
 
 define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v4f32_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 2
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -5178,16 +7446,25 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i6
 
 
 define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], #8
+; CHECK-LABEL: test_v2f32_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], #8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 2
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st2lane:
-;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.s { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -5197,16 +7474,25 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i6
 
 
 define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v2f64_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 2
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -5216,16 +7502,25 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>,
 
 
 define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], #16
+; CHECK-LABEL: test_v1f64_post_imm_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 2
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st2lane:
-;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st2lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    st2.d { v0, v1 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -5235,16 +7530,26 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>,
 
 
 define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st3lane:
-;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-LABEL: test_v16i8_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 3
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st3lane:
-;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -5254,16 +7559,26 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i
 
 
 define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st3lane:
-;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-LABEL: test_v8i8_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 3
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st3lane:
-;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -5273,16 +7588,27 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>,
 
 
 define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st3lane:
-;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-LABEL: test_v8i16_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 3
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st3lane:
-;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -5292,16 +7618,27 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i
 
 
 define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st3lane:
-;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-LABEL: test_v4i16_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 3
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st3lane:
-;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -5311,16 +7648,27 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i
 
 
 define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v4i32_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 3
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -5330,16 +7678,27 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i
 
 
 define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v2i32_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 3
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -5349,16 +7708,27 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i
 
 
 define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v2i64_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 3
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -5368,16 +7738,27 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i
 
 
 define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v1i64_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 3
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -5387,16 +7768,27 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i
 
 
 define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v4f32_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 3
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -5406,16 +7798,27 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4
 
 
 define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-LABEL: test_v2f32_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 3
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st3lane:
-;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -5425,16 +7828,27 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2
 
 
 define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v2f64_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 3
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -5444,16 +7858,27 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>,
 
 
 define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-LABEL: test_v1f64_post_imm_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 3
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st3lane:
-;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st3lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -5463,16 +7888,28 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>,
 
 
 define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
-;CHECK-LABEL: test_v16i8_post_imm_st4lane:
-;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-LABEL: test_v16i8_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 4
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v16i8_post_reg_st4lane:
-;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v16i8_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -5482,16 +7919,28 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i
 
 
 define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
-;CHECK-LABEL: test_v8i8_post_imm_st4lane:
-;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-LABEL: test_v8i8_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i32 4
   ret i8* %tmp
 }
 
 define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i8_post_reg_st4lane:
-;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i8_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8, i8* %A, i64 %inc
   ret i8* %tmp
@@ -5501,16 +7950,29 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>,
 
 
 define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
-;CHECK-LABEL: test_v8i16_post_imm_st4lane:
-;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-LABEL: test_v8i16_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 4
   ret i16* %tmp
 }
 
 define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v8i16_post_reg_st4lane:
-;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v8i16_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -5520,16 +7982,29 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i
 
 
 define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
-;CHECK-LABEL: test_v4i16_post_imm_st4lane:
-;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-LABEL: test_v4i16_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i32 4
   ret i16* %tmp
 }
 
 define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i16_post_reg_st4lane:
-;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i16_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16, i16* %A, i64 %inc
   ret i16* %tmp
@@ -5539,16 +8014,29 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i
 
 
 define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
-;CHECK-LABEL: test_v4i32_post_imm_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v4i32_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   ret i32* %tmp
 }
 
 define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4i32_post_reg_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4i32_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -5558,16 +8046,29 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i
 
 
 define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
-;CHECK-LABEL: test_v2i32_post_imm_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v2i32_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i32 4
   ret i32* %tmp
 }
 
 define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i32_post_reg_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i32_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32, i32* %A, i64 %inc
   ret i32* %tmp
@@ -5577,16 +8078,29 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i
 
 
 define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
-;CHECK-LABEL: test_v2i64_post_imm_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v2i64_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 4
   ret i64* %tmp
 }
 
 define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2i64_post_reg_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2i64_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -5596,16 +8110,29 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i
 
 
 define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
-;CHECK-LABEL: test_v1i64_post_imm_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v1i64_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 4
   ret i64* %tmp
 }
 
 define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1i64_post_reg_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1i64_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64, i64* %A, i64 %inc
   ret i64* %tmp
@@ -5615,16 +8142,29 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i
 
 
 define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
-;CHECK-LABEL: test_v4f32_post_imm_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v4f32_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   ret float* %tmp
 }
 
 define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v4f32_post_reg_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v4f32_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -5634,16 +8174,29 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4
 
 
 define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
-;CHECK-LABEL: test_v2f32_post_imm_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-LABEL: test_v2f32_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i32 4
   ret float* %tmp
 }
 
 define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f32_post_reg_st4lane:
-;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f32_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float, float* %A, i64 %inc
   ret float* %tmp
@@ -5653,16 +8206,29 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2
 
 
 define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
-;CHECK-LABEL: test_v2f64_post_imm_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v2f64_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 4
   ret double* %tmp
 }
 
 define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v2f64_post_reg_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v2f64_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -5672,16 +8238,29 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>,
 
 
 define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
-;CHECK-LABEL: test_v1f64_post_imm_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-LABEL: test_v1f64_post_imm_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 4
   ret double* %tmp
 }
 
 define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
-;CHECK-LABEL: test_v1f64_post_reg_st4lane:
-;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+; CHECK-LABEL: test_v1f64_post_reg_st4lane:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double, double* %A, i64 %inc
   ret double* %tmp
@@ -5691,7 +8270,10 @@ declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>,
 
 define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 ; CHECK-LABEL: test_v16i8_post_imm_ld1r:
-; CHECK: ld1r.16b { v0 }, [x0], #1
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.16b { v0 }, [x0], #1
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
@@ -5716,7 +8298,10 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 
 define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v16i8_post_reg_ld1r:
-; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.16b { v0 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
@@ -5741,7 +8326,10 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
 
 define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 ; CHECK-LABEL: test_v8i8_post_imm_ld1r:
-; CHECK: ld1r.8b { v0 }, [x0], #1
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.8b { v0 }, [x0], #1
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
@@ -5758,7 +8346,10 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 
 define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v8i8_post_reg_ld1r:
-; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.8b { v0 }, [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
@@ -5775,7 +8366,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
 
 define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
 ; CHECK-LABEL: test_v8i16_post_imm_ld1r:
-; CHECK: ld1r.8h { v0 }, [x0], #2
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.8h { v0 }, [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
@@ -5792,7 +8386,11 @@ define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
 
 define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v8i16_post_reg_ld1r:
-; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1r.8h { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
@@ -5809,7 +8407,10 @@ define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
 
 define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
 ; CHECK-LABEL: test_v4i16_post_imm_ld1r:
-; CHECK: ld1r.4h { v0 }, [x0], #2
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.4h { v0 }, [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
@@ -5822,7 +8423,11 @@ define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
 
 define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v4i16_post_reg_ld1r:
-; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1r.4h { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
@@ -5835,7 +8440,10 @@ define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
 
 define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
 ; CHECK-LABEL: test_v4i32_post_imm_ld1r:
-; CHECK: ld1r.4s { v0 }, [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
@@ -5848,7 +8456,11 @@ define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
 
 define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v4i32_post_reg_ld1r:
-; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
@@ -5861,7 +8473,10 @@ define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
 
 define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
 ; CHECK-LABEL: test_v2i32_post_imm_ld1r:
-; CHECK: ld1r.2s { v0 }, [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
@@ -5872,7 +8487,11 @@ define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
 
 define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2i32_post_reg_ld1r:
-; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
@@ -5883,7 +8502,10 @@ define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
 
 define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
 ; CHECK-LABEL: test_v2i64_post_imm_ld1r:
-; CHECK: ld1r.2d { v0 }, [x0], #8
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
@@ -5894,7 +8516,11 @@ define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
 
 define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2i64_post_reg_ld1r:
-; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
@@ -5905,7 +8531,10 @@ define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
 
 define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
 ; CHECK-LABEL: test_v4f32_post_imm_ld1r:
-; CHECK: ld1r.4s { v0 }, [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
@@ -5918,7 +8547,11 @@ define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
 
 define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v4f32_post_reg_ld1r:
-; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
@@ -5931,7 +8564,10 @@ define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc
 
 define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
 ; CHECK-LABEL: test_v2f32_post_imm_ld1r:
-; CHECK: ld1r.2s { v0 }, [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
@@ -5942,7 +8578,11 @@ define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
 
 define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2f32_post_reg_ld1r:
-; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
@@ -5953,7 +8593,10 @@ define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc
 
 define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
 ; CHECK-LABEL: test_v2f64_post_imm_ld1r:
-; CHECK: ld1r.2d { v0 }, [x0], #8
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
   %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
@@ -5964,7 +8607,11 @@ define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
 
 define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2f64_post_reg_ld1r:
-; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
   %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
@@ -5975,7 +8622,10 @@ define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %
 
 define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
 ; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
-; CHECK: ld1.b { v0 }[1], [x0], #1
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.b { v0 }[1], [x0], #1
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 1
@@ -5985,7 +8635,10 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A)
 
 define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
 ; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
-; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.b { v0 }[1], [x0], x2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 %inc
@@ -5995,7 +8648,12 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16
 
 define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
 ; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
-; CHECK: ld1.b { v0 }[1], [x0], #1
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.b { v0 }[1], [x0], #1
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 1
@@ -6005,7 +8663,12 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
 
 define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
 ; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
-; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.b { v0 }[1], [x0], x2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 %inc
@@ -6015,7 +8678,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x
 
 define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
 ; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
-; CHECK: ld1.h { v0 }[1], [x0], #2
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.h { v0 }[1], [x0], #2
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 1
@@ -6025,7 +8691,11 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %
 
 define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
 ; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
-; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.h { v0 }[1], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
@@ -6035,7 +8705,12 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <
 
 define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
 ; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
-; CHECK: ld1.h { v0 }[1], [x0], #2
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.h { v0 }[1], [x0], #2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 1
@@ -6045,7 +8720,13 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %
 
 define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
-; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.h { v0 }[1], [x0], x8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
@@ -6055,7 +8736,10 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <
 
 define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
 ; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 1
@@ -6065,7 +8749,11 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %
 
 define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
 ; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 %inc
@@ -6075,7 +8763,12 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <
 
 define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
 ; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], #4
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 1
@@ -6085,7 +8778,13 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %
 
 define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
 ; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], x8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 %inc
@@ -6095,7 +8794,10 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <
 
 define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
 ; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
-; CHECK: ld1.d { v0 }[1], [x0], #8
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[1], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
   %tmp3 = getelementptr i64, i64* %bar, i64 1
@@ -6105,7 +8807,11 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %
 
 define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
 ; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
-; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.d { v0 }[1], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
   %tmp3 = getelementptr i64, i64* %bar, i64 %inc
@@ -6115,7 +8821,10 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <
 
 define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
 ; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], #4
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 1
@@ -6125,7 +8834,11 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x
 
 define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 %inc
@@ -6135,7 +8848,12 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %
 
 define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
 ; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], #4
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], #4
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 1
@@ -6145,7 +8863,13 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x
 
 define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
 ; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
-; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    lsl x8, x2, #2
+; CHECK-NEXT:    ld1.s { v0 }[1], [x0], x8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 %inc
@@ -6155,7 +8879,10 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %
 
 define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
 ; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
-; CHECK: ld1.d { v0 }[1], [x0], #8
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[1], [x0], #8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
   %tmp3 = getelementptr double, double* %bar, i64 1
@@ -6165,7 +8892,11 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2
 
 define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
 ; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
-; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    lsl x8, x2, #3
+; CHECK-NEXT:    ld1.d { v0 }[1], [x0], x8
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ret
   %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
   %tmp3 = getelementptr double, double* %bar, i64 %inc
@@ -6176,14 +8907,14 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i6
 ; Check for dependencies between the vector and the scalar load.
 define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) {
 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
-; CHECK: %bb.0:
-; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
-; CHECK-NEXT: str q0, [x3]
-; CHECK-NEXT: ldr q0, [x4]
-; CHECK-NEXT: mov.s v0[1], v[[LD]][0]
-; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
-; CHECK-NEXT: str [[POST]], [x1]
-; CHECK-NEXT: ret
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr s1, [x0]
+; CHECK-NEXT:    str q0, [x3]
+; CHECK-NEXT:    ldr q0, [x4]
+; CHECK-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-NEXT:    str x8, [x1]
+; CHECK-NEXT:    mov.s v0[1], v1[0]
+; CHECK-NEXT:    ret
   %tmp1 = load float, float* %bar
   store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16
   %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
@@ -6202,7 +8933,18 @@ define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, flo
 ; PR23265
 define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
-; CHECK: ld1.h  { v0 }[1], [x0], x{{[0-9]+}}
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    lsl x8, x2, #1
+; CHECK-NEXT:    ld1.h { v0 }[1], [x0], x8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    str x0, [x1]
+; CHECK-NEXT:    ldr d1, [x3]
+; CHECK-NEXT:    cnt.8b v1, v1
+; CHECK-NEXT:    uaddlp.4h v1, v1
+; CHECK-NEXT:    uaddlp.2s v1, v1
+; CHECK-NEXT:    str d1, [x3]
+; CHECK-NEXT:    ret
   %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
@@ -6215,15 +8957,16 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %pt
 
 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 
-; CHECK-LABEL: test_ld1lane_build:
-; CHECK-DAG: ldr s[[REGNUM0:[0-9]+]], [x0]
-; CHECK-DAG: ld1.s { v[[REGNUM0:[0-9]+]] }[1], [x1]
-; CHECK-DAG: ldr s[[REGNUM1:[0-9]+]], [x2]
-; CHECK-DAG: ld1.s { v[[REGNUM1:[0-9]+]] }[1], [x3]
-; CHECK: sub.2s v[[REGNUM2:[0-9]+]], v[[REGNUM0]], v[[REGNUM1]]
-; CHECK-NEXT: str d[[REGNUM2]], [x4]
-; CHECK-NEXT: ret
 define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) {
+; CHECK-LABEL: test_ld1lane_build:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ldr s1, [x2]
+; CHECK-NEXT:    ld1.s { v0 }[1], [x1]
+; CHECK-NEXT:    ld1.s { v1 }[1], [x3]
+; CHECK-NEXT:    sub.2s v0, v0, v1
+; CHECK-NEXT:    str d0, [x4]
+; CHECK-NEXT:    ret
   %load0 = load i32, i32* %ptr0, align 4
   %load1 = load i32, i32* %ptr1, align 4
   %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
@@ -6239,15 +8982,16 @@ define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3,
   ret void
 }
 
-; CHECK-LABEL: test_ld1lane_build_i16:
-; CHECK-DAG:  ldr h[[REGNUM1:[0-9]+]], [x0]
-; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[1], [x1]
-; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[2], [x2]
-; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[3], [x3]
-; CHECK:      sub.4h v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0
-; CHECK-NEXT: str d[[REGNUM2]], [x4]
-; CHECK-NEXT: ret
 define void  @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i16> %e, <4 x i16>* %p) {
+; CHECK-LABEL: test_ld1lane_build_i16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr h1, [x0]
+; CHECK-NEXT:    ld1.h { v1 }[1], [x1]
+; CHECK-NEXT:    ld1.h { v1 }[2], [x2]
+; CHECK-NEXT:    ld1.h { v1 }[3], [x3]
+; CHECK-NEXT:    sub.4h v0, v1, v0
+; CHECK-NEXT:    str d0, [x4]
+; CHECK-NEXT:    ret
   %ld.a = load i16, i16* %a
   %ld.b = load i16, i16* %b
   %ld.c = load i16, i16* %c
@@ -6261,18 +9005,19 @@ define void  @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i1
   ret void
 }
 
-; CHECK-LABEL: test_ld1lane_build_half:
-; CHECK-DAG:  ldr h[[REGNUM1:[0-9]+]], [x0]
-; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[1], [x1]
-; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[2], [x2]
-; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[3], [x3]
-; CHECK-DAG:  fcvtl v[[REGNUM01:[0-9]+]].4s, v0.4h
-; CHECK-DAG:  fcvtl v[[REGNUM11:[0-9]+]].4s, v[[REGNUM1]].4h
-; CHECK:      fsub.4s v[[REGNUM2:[0-9]+]], v[[REGNUM11]], v[[REGNUM01]]
-; CHECK-DAG:  fcvtn v[[REGNUM3:[0-9]+]].4h, v[[REGNUM2]].4s
-; CHECK-NEXT: str d[[REGNUM2]], [x4]
-; CHECK-NEXT: ret
 define void  @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 x half> %e, <4 x half>* %p) {
+; CHECK-LABEL: test_ld1lane_build_half:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr h1, [x0]
+; CHECK-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-NEXT:    ld1.h { v1 }[1], [x1]
+; CHECK-NEXT:    ld1.h { v1 }[2], [x2]
+; CHECK-NEXT:    ld1.h { v1 }[3], [x3]
+; CHECK-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-NEXT:    fsub.4s v0, v1, v0
+; CHECK-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-NEXT:    str d0, [x4]
+; CHECK-NEXT:    ret
   %ld.a = load half, half* %a
   %ld.b = load half, half* %b
   %ld.c = load half, half* %c
@@ -6286,19 +9031,21 @@ define void  @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4
   ret void
 }
 
-; CHECK-LABEL: test_ld1lane_build_i8:
-; CHECK-DAG:  ldr b[[REGNUM1:[0-9]+]], [x0]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[1], [x1]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[2], [x2]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[3], [x3]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[4], [x4]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[5], [x5]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[6], [x6]
-; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[7], [x7]
-; CHECK:      sub.8b v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0
-; CHECK-NEXT: str d[[REGNUM2]], [x
-; CHECK-NEXT: ret
 define void  @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* %f, i8* %g, i8* %h, <8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: test_ld1lane_build_i8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr b1, [x0]
+; CHECK-NEXT:    ldr x8, [sp]
+; CHECK-NEXT:    ld1.b { v1 }[1], [x1]
+; CHECK-NEXT:    ld1.b { v1 }[2], [x2]
+; CHECK-NEXT:    ld1.b { v1 }[3], [x3]
+; CHECK-NEXT:    ld1.b { v1 }[4], [x4]
+; CHECK-NEXT:    ld1.b { v1 }[5], [x5]
+; CHECK-NEXT:    ld1.b { v1 }[6], [x6]
+; CHECK-NEXT:    ld1.b { v1 }[7], [x7]
+; CHECK-NEXT:    sub.8b v0, v1, v0
+; CHECK-NEXT:    str d0, [x8]
+; CHECK-NEXT:    ret
   %ld.a = load i8, i8* %a
   %ld.b = load i8, i8* %b
   %ld.c = load i8, i8* %c
@@ -6322,8 +9069,13 @@ define void  @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8*
 
 define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) {
 ; CHECK-LABEL: test_inc_cycle:
-; CHECK: ld1.s { v0 }[0], [x0]{{$}}
-
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    adrp x8, _var at PAGE
+; CHECK-NEXT:    fmov x9, d0
+; CHECK-NEXT:    add x9, x0, x9, lsl #2
+; CHECK-NEXT:    str x9, [x8, _var at PAGEOFF]
+; CHECK-NEXT:    ret
   %elt = load i32, i32* %in
   %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
 
@@ -6340,95 +9092,106 @@ define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) {
 @var = global i32* null
 
 define i8 @load_single_extract_variable_index_i8(<16 x i8>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_i8
-; CHECK:      ldr [[VEC:.*]], [x0]
-; CHECK-NEXT: mov [[SP_ADDR:.*]], sp
-; CHECK-NEXT: str [[VEC]], [sp]
-; CHECK-NEXT: bfxil [[SP_ADDR]], x1, #0, #4
-; CHECK-NEXT: ldrb w0, [{{ *}}[[SP_ADDR]]{{ *}}]
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_i8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    bfxil x8, x1, #0, #4
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    ldrb w0, [x8]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
   %lv = load <16 x i8>, <16 x i8>* %A
   %e = extractelement <16 x i8> %lv, i32 %idx
   ret i8 %e
 }
 
 define i16 @load_single_extract_variable_index_i16(<8 x i16>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_i16
-; CHECK:      ldr [[VEC:.*]], [x0]
-; CHECK-NEXT: and [[IDX:.*]], x1, #0x7
-; CHECK-NEXT: mov [[SP_ADDR:.*]], sp
-; CHECK-NEXT: str [[VEC]], [sp]
-; CHECK-NEXT: bfi [[SP_ADDR]], [[IDX]], #1, #3
-; CHECK-NEXT: ldrh w0, [{{ *}}[[SP_ADDR]]{{ *}}]
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_i16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x8, x1, #0x7
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    bfi x9, x8, #1, #3
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    ldrh w0, [x9]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
   %lv = load <8 x i16>, <8 x i16>* %A
   %e = extractelement <8 x i16> %lv, i32 %idx
   ret i16 %e
 }
 
 define i32 @load_single_extract_variable_index_i32(<4 x i32>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_i32
-; CHECK:       and [[IDX:.*]], x1, #0x3
-; CHECK-NEXT:  ldr w0, [x0, [[IDX]], lsl #2]
-; CHECK-NEXT:  ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x8, x1, #0x3
+; CHECK-NEXT:    ldr w0, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
   %lv = load <4 x i32>, <4 x i32>* %A
   %e = extractelement <4 x i32> %lv, i32 %idx
   ret i32 %e
 }
 
 define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align
-; CHECK:       ldr d0, [x0]
-; CHECK-NEXT:  add x[[PTR_ADD:.*]], x0, #8
-; CHECK-NEXT:  ld1.s   { v0 }[2], [x[[PTR_ADD]]]
-; CHECK-NEXT:  and [[IDX_1:.*]], x1, #0x3
-; CHECK-NEXT:  mov x[[IDX_2:.*]], sp
-; CHECK-NEXT:  str q0, [sp]
-; CHECK-NEXT:  bfi x[[IDX_2]], [[IDX_1]], #2, #2
-; CHECK-NEXT:  ldr w0, [x[[IDX_2]]]
-; CHECK-NEXT:  add sp, sp, #16
-; CHECK-NEXT:  ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    add x8, x0, #8
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ld1.s { v0 }[2], [x8]
+; CHECK-NEXT:    and x8, x1, #0x3
+; CHECK-NEXT:    bfi x9, x8, #2, #2
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    ldr w0, [x9]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
   %lv = load <3 x i32>, <3 x i32>* %A, align 2
   %e = extractelement <3 x i32> %lv, i32 %idx
   ret i32 %e
 }
 
 define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align
-; CHECK:       sxtw  [[IDX:.*]], w1
-; CHECK-NEXT:  cmp [[IDX]], #2
-; CHECK-NEXT:  mov w[[TMP:.*]], #2
-; CHECK-NEXT:  csel    [[IDX]], [[IDX]], x[[TMP]], lo
-; CHECK-NEXT:  ldr w0, [x0, [[IDX]], lsl #2]
-; CHECK-NEXT: ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    cmp x8, #2
+; CHECK-NEXT:    mov w9, #2
+; CHECK-NEXT:    csel x8, x8, x9, lo
+; CHECK-NEXT:    ldr w0, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
   %lv = load <3 x i32>, <3 x i32>* %A
   %e = extractelement <3 x i32> %lv, i32 %idx
   ret i32 %e
 }
 
 define i32 @load_single_extract_valid_const_index_v3i32(<3 x i32>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_valid_const_index_v3i32
-; CHECK:      ldr w0, [x0, #8]
-; CHECK-NEXT: ret
-;
+; CHECK-LABEL: load_single_extract_valid_const_index_v3i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr w0, [x0, #8]
+; CHECK-NEXT:    ret
   %lv = load <3 x i32>, <3 x i32>* %A
   %e = extractelement <3 x i32> %lv, i32 2
   ret i32 %e
 }
 
 define i32 @load_single_extract_variable_index_masked_i32(<4 x i32>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_masked_i32
-; CHECK:       and [[IDX:.*]], w1, #0x3
-; CHECK-NEXT:  ldr w0, [x0, [[IDX]], uxtw #2]
-; CHECK-NEXT:  ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_masked_i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    and w8, w1, #0x3
+; CHECK-NEXT:    ldr w0, [x0, w8, uxtw #2]
+; CHECK-NEXT:    ret
   %idx.x = and i32 %idx, 3
   %lv = load <4 x i32>, <4 x i32>* %A
   %e = extractelement <4 x i32> %lv, i32 %idx.x
@@ -6436,11 +9199,11 @@ define i32 @load_single_extract_variable_index_masked_i32(<4 x i32>* %A, i32 %id
 }
 
 define i32 @load_single_extract_variable_index_masked2_i32(<4 x i32>* %A, i32 %idx) {
-; CHECK-LABEL: load_single_extract_variable_index_masked2_i32
-; CHECK:       and [[IDX:.*]], w1, #0x1
-; CHECK-NEXT:  ldr w0, [x0, [[IDX]], uxtw #2]
-; CHECK-NEXT:  ret
-;
+; CHECK-LABEL: load_single_extract_variable_index_masked2_i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    and w8, w1, #0x1
+; CHECK-NEXT:    ldr w0, [x0, w8, uxtw #2]
+; CHECK-NEXT:    ret
   %idx.x = and i32 %idx, 1
   %lv = load <4 x i32>, <4 x i32>* %A
   %e = extractelement <4 x i32> %lv, i32 %idx.x

diff  --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
index cb3e095aceb21..f9286f57daea1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,27 +1,40 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s
 
 ; rdar://9167275
 
 define i32 @t1() nounwind ssp {
-entry:
 ; CHECK-LABEL: t1:
-; CHECK: mov {{w[0-9]+}}, 7
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov w0, 7
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
+entry:
   %0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind
   ret i32 %0
 }
 
 define i64 @t2() nounwind ssp {
-entry:
 ; CHECK-LABEL: t2:
-; CHECK: mov {{x[0-9]+}}, 7
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov x0, 7
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
+entry:
   %0 = tail call i64 asm "mov $0, 7", "=r"() nounwind
   ret i64 %0
 }
 
 define i64 @t3() nounwind ssp {
-entry:
 ; CHECK-LABEL: t3:
-; CHECK: mov {{w[0-9]+}}, 7
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov w0, 7
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
+entry:
   %0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind
   ret i64 %0
 }
@@ -29,9 +42,14 @@ entry:
 ; rdar://9281206
 
 define void @t4(i64 %op) nounwind {
-entry:
 ; CHECK-LABEL: t4:
-; CHECK: mov x0, {{x[0-9]+}}; svc #0
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov x0, x8; svc #0;
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
+entry:
   %0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;", "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind
   ret void
 }
@@ -39,9 +57,13 @@ entry:
 ; rdar://9394290
 
 define float @t5(float %x) nounwind {
-entry:
 ; CHECK-LABEL: t5:
-; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    fadd s0, s0, s0
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
+entry:
   %0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) nounwind
   ret float %0
 }
@@ -49,19 +71,32 @@ entry:
 ; rdar://9553599
 
 define zeroext i8 @t6(i8* %src) nounwind {
-entry:
 ; CHECK-LABEL: t6:
-; CHECK: ldtrb {{w[0-9]+}}, [{{x[0-9]+}}]
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldtrb w8, [x0]
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    and w0, w8, #0xff
+; CHECK-NEXT:    ret
+entry:
   %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind
   ret i8 %0
 }
 
 define void @t7(i8* %f, i32 %g) nounwind {
+; CHECK-LABEL: t7:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    str x0, [sp, #8]
+; CHECK-NEXT:    add x8, sp, #8
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    str w1, [x8]
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
 entry:
   %f.addr = alloca i8*, align 8
   store i8* %f, i8** %f.addr, align 8
-  ; CHECK-LABEL: t7:
-  ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}]
   call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
   ret void
 }
@@ -70,144 +105,261 @@ entry:
 ; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize 'v'
 ; registers.
 define void @t8() nounwind ssp {
-entry:
 ; CHECK-LABEL: t8:
-; CHECK: stp {{d[0-9]+}}, {{d[0-9]+}}, [sp, #-16]
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d9, d8, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldp d9, d8, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
   tail call void asm sideeffect "nop", "~{v8}"() nounwind
   ret void
 }
 
 define i32 @constraint_I(i32 %i, i32 %j) nounwind {
+; CHECK-LABEL: constraint_I:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    add w8, w0, 16773120
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    add w0, w0, 4096
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: constraint_I:
   %0 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 16773120) nounwind
-  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, 16773120
   %1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 4096) nounwind
-  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, 4096
   ret i32 %1
 }
 
 define i32 @constraint_J(i32 %i, i32 %j, i64 %k) nounwind {
+; CHECK-LABEL: constraint_J:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    sub w8, w0, -16773120
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    sub w0, w0, -1
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    sub x8, x2, -1
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    sub x8, x2, -1
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: constraint_J:
   %0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -16773120) nounwind
-  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, -16773120
   %1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -1) nounwind
-  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, -1
   %2 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i32 -1) nounwind
-  ; CHECK: sub   {{x[0-9]+}}, {{x[0-9]+}}, -1
   %3 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i64 -1) nounwind
-  ; CHECK: sub   {{x[0-9]+}}, {{x[0-9]+}}, -1
   ret i32 %1
 }
 
 define i32 @constraint_KL(i32 %i, i32 %j) nounwind {
+; CHECK-LABEL: constraint_KL:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    eor w8, w0, 255
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    eor w0, w0, 16711680
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: constraint_KL:
   %0 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,K"(i32 %i, i32 255) nounwind
-  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, 255
   %1 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,L"(i32 %i, i64 16711680) nounwind
-  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, 16711680
   ret i32 %1
 }
 
 define i32 @constraint_MN(i32 %i, i32 %j) nounwind {
+; CHECK-LABEL: constraint_MN:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    movk w8, 65535
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    movz w0, 0
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: constraint_MN:
   %0 = tail call i32 asm sideeffect "movk ${0:w}, $1", "=r,M"(i32 65535) nounwind
-  ; CHECK: movk  {{w[0-9]+}}, 65535
   %1 = tail call i32 asm sideeffect "movz ${0:w}, $1", "=r,N"(i64 0) nounwind
-  ; CHECK: movz  {{w[0-9]+}}, 0
   ret i32 %1
 }
 
 define void @t9() nounwind {
+; CHECK-LABEL: t9:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    ldr q0, [sp], #16
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov.2d v4, v0
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: t9:
   %data = alloca <2 x double>, align 16
   %0 = load <2 x double>, <2 x double>* %data, align 16
   call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind
-  ; CHECK: mov.2d v4, {{v[0-9]+}}
   ret void
 }
 
 define void @t10() nounwind {
+; CHECK-LABEL: t10:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    ldr d0, [sp, #8]
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr z0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr q0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr d0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr h0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr b0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: t10:
   %data = alloca <2 x float>, align 8
   %a = alloca [2 x float], align 4
   %arraydecay = getelementptr inbounds [2 x float], [2 x float]* %a, i32 0, i32 0
   %0 = load <2 x float>, <2 x float>* %data, align 8
   call void asm sideeffect "ldr ${1:z}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{z[0-9]+}}, [{{x[0-9]+}}]
   call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
   call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
   call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}]
   call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{h[0-9]+}}, [{{x[0-9]+}}]
   call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{b[0-9]+}}, [{{x[0-9]+}}]
   ret void
 }
 
 define void @t11() nounwind {
+; CHECK-LABEL: t11:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    ldr w8, [sp, #12]
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov xzr, x8
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr w8, [sp, #12]
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov wzr, w8
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: t11:
   %a = alloca i32, align 4
   %0 = load i32, i32* %a, align 4
   call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind
-  ; CHECK: mov xzr, {{x[0-9]+}}
   %1 = load i32, i32* %a, align 4
   call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind
-  ; CHECK: mov wzr, {{w[0-9]+}}
   ret void
 }
 
 define void @t12() nounwind {
+; CHECK-LABEL: t12:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    ldr q0, [sp], #16
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov.2d v4, v0
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: t12:
   %data = alloca <4 x float>, align 16
   %0 = load <4 x float>, <4 x float>* %data, align 16
   call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind
-  ; CHECK: mov.2d v4, {{v([0-9])|(1[0-5])}}
   ret void
 }
 
 define void @t13() nounwind {
+; CHECK-LABEL: t13:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov x4, 1311673391471656960
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov x4, -4662
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov x4, 4660
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov x4, -71777214294589696
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: t13:
   tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 1311673391471656960) nounwind
-  ; CHECK: mov x4, 1311673391471656960
   tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 -4662) nounwind
-  ; CHECK: mov x4, -4662
   tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 4660) nounwind
-  ; CHECK: mov x4, 4660
   call void asm sideeffect "mov x4, $0\0A", "N"(i64 -71777214294589696) nounwind
-  ; CHECK: mov x4, -71777214294589696
   ret void
 }
 
 define void @t14() nounwind {
+; CHECK-LABEL: t14:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov w4, 305397760
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov w4, 4294962634
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov w4, 4660
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov w4, 4278255360
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-  ; CHECK-LABEL: t14:
   tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 305397760) nounwind
-  ; CHECK: mov w4, 305397760
   tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 -4662) nounwind
-  ; CHECK: mov w4, 4294962634
   tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 4660) nounwind
-  ; CHECK: mov w4, 4660
   call void asm sideeffect "mov w4, $0\0A", "M"(i32 -16711936) nounwind
-  ; CHECK: mov w4, 4278255360
   ret void
 }
 
 define void @t15() nounwind {
+; CHECK-LABEL: t15:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    fmov x8, d8
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
   %0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind
-  ; CHECK: fmov {{x[0-9]+}}, d8
   ret void
 }
 
@@ -215,81 +367,134 @@ entry:
 
 define void @test_zero_reg(i32* %addr) {
 ; CHECK-LABEL: test_zero_reg:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    USE(xzr)
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    USE(wzr)
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    USE(w8)
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    USE(xzr), USE(xzr)
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    USE(xzr), USE(wzr)
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 
   tail call void asm sideeffect "USE($0)", "z"(i32 0) nounwind
-; CHECK: USE(xzr)
 
   tail call void asm sideeffect "USE(${0:w})", "zr"(i32 0)
-; CHECK: USE(wzr)
 
   tail call void asm sideeffect "USE(${0:w})", "zr"(i32 1)
-; CHECK: mov [[VAL1:w[0-9]+]], #1
-; CHECK: USE([[VAL1]])
 
   tail call void asm sideeffect "USE($0), USE($1)", "z,z"(i32 0, i32 0) nounwind
-; CHECK: USE(xzr), USE(xzr)
 
   tail call void asm sideeffect "USE($0), USE(${1:w})", "z,z"(i32 0, i32 0) nounwind
-; CHECK: USE(xzr), USE(wzr)
 
   ret void
 }
 
 define <2 x float> @test_vreg_64bit(<2 x float> %in) nounwind {
-  ; CHECK-LABEL: test_vreg_64bit:
+; CHECK-LABEL: test_vreg_64bit:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    fadd v14.2s, v0.2s, v0.2s
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    fmov d0, d14
+; CHECK-NEXT:    ldp d15, d14, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
   %1 = tail call <2 x float> asm sideeffect "fadd ${0}.2s, ${1}.2s, ${1}.2s", "={v14},w"(<2 x float> %in) nounwind
-  ; CHECK: fadd v14.2s, v0.2s, v0.2s
   ret <2 x float> %1
 }
 
 define <4 x float> @test_vreg_128bit(<4 x float> %in) nounwind {
-  ; CHECK-LABEL: test_vreg_128bit:
+; CHECK-LABEL: test_vreg_128bit:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    fadd v14.4s, v0.4s, v0.4s
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    mov.16b v0, v14
+; CHECK-NEXT:    ldp d15, d14, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
   %1 = tail call <4 x float> asm sideeffect "fadd ${0}.4s, ${1}.4s, ${1}.4s", "={v14},w"(<4 x float> %in) nounwind
-  ; CHECK: fadd v14.4s, v0.4s, v0.4s
   ret <4 x float> %1
 }
 
 define void @test_constraint_w(i32 %a) {
-  ; CHECK: fmov [[SREG:s[0-9]+]], {{w[0-9]+}}
-  ; CHECK: sqxtn h0, [[SREG]]
+; CHECK-LABEL: test_constraint_w:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    sqxtn h0, s0
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 
   tail call void asm sideeffect "sqxtn h0, ${0:s}\0A", "w"(i32 %a)
   ret void
 }
 
 define void @test_inline_modifier_a(i8* %ptr) nounwind {
-  ; CHECK-LABEL: test_inline_modifier_a:
+; CHECK-LABEL: test_inline_modifier_a:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    prfm pldl1keep, [x0]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
   tail call void asm sideeffect "prfm pldl1keep, ${0:a}\0A", "r"(i8* %ptr)
-  ; CHECK: prfm pldl1keep, [x0]
   ret void
 }
 
 ; PR33134
 define void @test_zero_address() {
+; CHECK-LABEL: test_zero_address:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: test_zero_address
-; CHECK: mov {{x[0-9]+}}, xzr
-; CHECK: ldr {{x[0-9]+}}, {{[x[0-9]+]}}
   tail call i32 asm sideeffect "ldr $0, $1 \0A", "=r,*Q"(i32* null)
   ret void
 }
 
 ; No '#' in lane specifier
 define void @test_no_hash_in_lane_specifier() {
-; CHECK-LABEL: test_no_hash_in_lane_specifier
-; CHECK: fmla v2.4s, v0.4s, v1.s[1]
-; CHECK: ret
+; CHECK-LABEL: test_no_hash_in_lane_specifier:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    fmla v2.4s, v0.4s, v1.s[1]
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
   tail call void asm sideeffect "fmla v2.4s, v0.4s, v1.s[$0]", "I"(i32 1) #1
   ret void
 }
+
 define void @test_vector_too_large_r_m(<9 x float>* nocapture readonly %0) {
-; CHECK-LABEL: test_vector_too_large_r_m
-; CHECK:      ldr [[S:s[0-9]+]], [x0, #32]
-; CHECK-DAG:  ldp [[Q0:q[0-9]+]], [[Q1:q[0-9]+]], [x0]
-; CHECK:      str [[S]], [sp, #32]
+; CHECK-LABEL: test_vector_too_large_r_m:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    ldr s0, [x0, #32]
+; CHECK-NEXT:    ldp q2, q1, [x0]
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    str s0, [sp, #32]
+; CHECK-NEXT:    stp q2, q1, [sp]
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
 ; CHECK-DAG   stp [[Q0]], [[Q1]], [sp]
-; CHECK:     ; InlineAsm Start
-;
 entry:
   %m.addr = alloca <9 x float>, align 16
   %m = load <9 x float>, <9 x float>* %0, align 16
@@ -300,9 +505,15 @@ entry:
 
 define void @test_o_output_constraint() {
 ; CHECK-LABEL: test_o_output_constraint:
-; CHECK: sub sp, sp, #16
-; CHECK: add x[[REG:[0-9]+]], sp, #15
-; CHECK: mov [x[[REG]]], 7
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    add x8, sp, #15
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    mov [x8], 7
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
   %b = alloca i8, align 1
   call void asm "mov $0, 7", "=*o"(i8* %b)
   ret void

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ldp.ll b/llvm/test/CodeGen/AArch64/arm64-ldp.ll
index 388f18bf801a2..6abde15ac763d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp.ll
@@ -1,8 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
 
-; CHECK-LABEL: ldp_int
-; CHECK: ldp
 define i32 @ldp_int(i32* %p) nounwind {
+; CHECK-LABEL: ldp_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w8, w9, [x0]
+; CHECK-NEXT:    add w0, w9, w8
+; CHECK-NEXT:    ret
   %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
   %tmp1 = load i32, i32* %add.ptr, align 4
@@ -10,9 +14,12 @@ define i32 @ldp_int(i32* %p) nounwind {
   ret i32 %add
 }
 
-; CHECK-LABEL: ldp_sext_int
-; CHECK: ldpsw
 define i64 @ldp_sext_int(i32* %p) nounwind {
+; CHECK-LABEL: ldp_sext_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldpsw x8, x9, [x0]
+; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    ret
   %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
   %tmp1 = load i32, i32* %add.ptr, align 4
@@ -22,10 +29,14 @@ define i64 @ldp_sext_int(i32* %p) nounwind {
   ret i64 %add
 }
 
-; CHECK-LABEL: ldp_half_sext_res0_int:
-; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
-; CHECK: sxtw     x[[DST1]], w[[DST1]]
 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
+; CHECK-LABEL: ldp_half_sext_res0_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w8, w9, [x0]
+; CHECK-NEXT:    // kill: def $w8 killed $w8 def $x8
+; CHECK-NEXT:    sxtw x8, w8
+; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    ret
   %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
   %tmp1 = load i32, i32* %add.ptr, align 4
@@ -35,10 +46,14 @@ define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
   ret i64 %add
 }
 
-; CHECK-LABEL: ldp_half_sext_res1_int:
-; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
-; CHECK: sxtw     x[[DST2]], w[[DST2]]
 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
+; CHECK-LABEL: ldp_half_sext_res1_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w8, w9, [x0]
+; CHECK-NEXT:    // kill: def $w9 killed $w9 def $x9
+; CHECK-NEXT:    sxtw x9, w9
+; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    ret
   %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
   %tmp1 = load i32, i32* %add.ptr, align 4
@@ -49,9 +64,12 @@ define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
 }
 
 
-; CHECK-LABEL: ldp_long
-; CHECK: ldp
 define i64 @ldp_long(i64* %p) nounwind {
+; CHECK-LABEL: ldp_long:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp x8, x9, [x0]
+; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    ret
   %tmp = load i64, i64* %p, align 8
   %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
   %tmp1 = load i64, i64* %add.ptr, align 8
@@ -59,9 +77,12 @@ define i64 @ldp_long(i64* %p) nounwind {
   ret i64 %add
 }
 
-; CHECK-LABEL: ldp_float
-; CHECK: ldp
 define float @ldp_float(float* %p) nounwind {
+; CHECK-LABEL: ldp_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp s0, s1, [x0]
+; CHECK-NEXT:    fadd s0, s0, s1
+; CHECK-NEXT:    ret
   %tmp = load float, float* %p, align 4
   %add.ptr = getelementptr inbounds float, float* %p, i64 1
   %tmp1 = load float, float* %add.ptr, align 4
@@ -69,9 +90,12 @@ define float @ldp_float(float* %p) nounwind {
   ret float %add
 }
 
-; CHECK-LABEL: ldp_double
-; CHECK: ldp
 define double @ldp_double(double* %p) nounwind {
+; CHECK-LABEL: ldp_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp d0, d1, [x0]
+; CHECK-NEXT:    fadd d0, d0, d1
+; CHECK-NEXT:    ret
   %tmp = load double, double* %p, align 8
   %add.ptr = getelementptr inbounds double, double* %p, i64 1
   %tmp1 = load double, double* %add.ptr, align 8
@@ -79,9 +103,12 @@ define double @ldp_double(double* %p) nounwind {
   ret double %add
 }
 
-; CHECK-LABEL: ldp_doublex2
-; CHECK: ldp
 define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind {
+; CHECK-LABEL: ldp_doublex2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
   %tmp = load <2 x double>, <2 x double>* %p, align 16
   %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1
   %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16
@@ -91,10 +118,11 @@ define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind {
 
 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
 define i32 @ldur_int(i32* %a) nounwind {
-; CHECK-LABEL: ldur_int
-; CHECK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
-; CHECK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w9, w8, [x0, #-8]
+; CHECK-NEXT:    add w0, w8, w9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -104,10 +132,11 @@ define i32 @ldur_int(i32* %a) nounwind {
 }
 
 define i64 @ldur_sext_int(i32* %a) nounwind {
-; CHECK-LABEL: ldur_sext_int
-; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
-; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_sext_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldpsw x9, x8, [x0, #-8]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -119,11 +148,13 @@ define i64 @ldur_sext_int(i32* %a) nounwind {
 }
 
 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
-; CHECK-LABEL: ldur_half_sext_int_res0
-; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
-; CHECK: sxtw     x[[DST1]], w[[DST1]]
-; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_half_sext_int_res0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w9, w8, [x0, #-8]
+; CHECK-NEXT:    // kill: def $w9 killed $w9 def $x9
+; CHECK-NEXT:    sxtw x9, w9
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -135,11 +166,13 @@ define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
 }
 
 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
-; CHECK-LABEL: ldur_half_sext_int_res1
-; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
-; CHECK: sxtw     x[[DST2]], w[[DST2]]
-; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_half_sext_int_res1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w9, w8, [x0, #-8]
+; CHECK-NEXT:    // kill: def $w8 killed $w8 def $x8
+; CHECK-NEXT:    sxtw x8, w8
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -152,10 +185,11 @@ define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
 
 
 define i64 @ldur_long(i64* %a) nounwind ssp {
-; CHECK-LABEL: ldur_long
-; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
-; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_long:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp x9, x8, [x0, #-16]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -1
   %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -2
@@ -165,10 +199,11 @@ define i64 @ldur_long(i64* %a) nounwind ssp {
 }
 
 define float @ldur_float(float* %a) {
-; CHECK-LABEL: ldur_float
-; CHECK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
-; CHECK-NEXT: fadd    s{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp s1, s0, [x0, #-8]
+; CHECK-NEXT:    fadd s0, s0, s1
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds float, float* %a, i64 -1
   %tmp1 = load float, float* %p1, align 2
   %p2 = getelementptr inbounds float, float* %a, i64 -2
@@ -178,10 +213,11 @@ define float @ldur_float(float* %a) {
 }
 
 define double @ldur_double(double* %a) {
-; CHECK-LABEL: ldur_double
-; CHECK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
-; CHECK-NEXT: fadd    d{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp d1, d0, [x0, #-16]
+; CHECK-NEXT:    fadd d0, d0, d1
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds double, double* %a, i64 -1
   %tmp1 = load double, double* %p1, align 2
   %p2 = getelementptr inbounds double, double* %a, i64 -2
@@ -191,10 +227,11 @@ define double @ldur_double(double* %a) {
 }
 
 define <2 x double> @ldur_doublex2(<2 x double>* %a) {
-; CHECK-LABEL: ldur_doublex2
-; CHECK: ldp     q[[DST1:[0-9]+]], q[[DST2:[0-9]+]], [x0, #-32]
-; CHECK-NEXT: fadd    v{{[0-9]+}}.2d, v[[DST2]].2d, v[[DST1]].2d
-; CHECK-NEXT: ret
+; CHECK-LABEL: ldur_doublex2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp q1, q0, [x0, #-32]
+; CHECK-NEXT:    fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1
   %tmp1 = load <2 x double>, <2 x double>* %p1, align 2
   %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2
@@ -205,11 +242,11 @@ define <2 x double> @ldur_doublex2(<2 x double>* %a) {
 
 ; Now check some boundary conditions
 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
-; CHECK-LABEL: pairUpBarelyIn
-; CHECK-NOT: ldur
-; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyIn:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp x9, x8, [x0, #-256]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -31
   %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -32
@@ -219,11 +256,11 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
-; CHECK-LABEL: pairUpBarelyInSext
-; CHECK-NOT: ldur
-; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInSext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldpsw x9, x8, [x0, #-256]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -235,12 +272,13 @@ define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
-; CHECK-LABEL: pairUpBarelyInHalfSextRes0
-; CHECK-NOT: ldur
-; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
-; CHECK: sxtw     x[[DST1]], w[[DST1]]
-; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInHalfSextRes0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w9, w8, [x0, #-256]
+; CHECK-NEXT:    // kill: def $w9 killed $w9 def $x9
+; CHECK-NEXT:    sxtw x9, w9
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -252,12 +290,13 @@ define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
-; CHECK-LABEL: pairUpBarelyInHalfSextRes1
-; CHECK-NOT: ldur
-; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
-; CHECK: sxtw     x[[DST2]], w[[DST2]]
-; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInHalfSextRes1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp w9, w8, [x0, #-256]
+; CHECK-NEXT:    // kill: def $w8 killed $w8 def $x8
+; CHECK-NEXT:    sxtw x8, w8
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -269,12 +308,15 @@ define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
-; CHECK-LABEL: pairUpBarelyOut
-; CHECK-NOT: ldp
 ; Don't be fragile about which loads or manipulations of the base register
 ; are used---just check that there isn't an ldp before the add
-; CHECK: add
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyOut:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x9, x0, #264
+; CHECK-NEXT:    ldur x8, [x0, #-256]
+; CHECK-NEXT:    ldr x9, [x9]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -32
   %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -33
@@ -284,12 +326,15 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
-; CHECK-LABEL: pairUpBarelyOutSext
-; CHECK-NOT: ldp
 ; Don't be fragile about which loads or manipulations of the base register
 ; are used---just check that there isn't an ldp before the add
-; CHECK: add
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyOutSext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x9, x0, #260
+; CHECK-NEXT:    ldursw x8, [x0, #-256]
+; CHECK-NEXT:    ldrsw x9, [x9]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -64
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -65
@@ -301,12 +346,12 @@ define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
-; CHECK-LABEL: pairUpNotAligned
-; CHECK-NOT: ldp
-; CHECK: ldur
-; CHECK-NEXT: ldur
-; CHECK-NEXT: add
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpNotAligned:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur x8, [x0, #-143]
+; CHECK-NEXT:    ldur x9, [x0, #-135]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -18
   %bp1 = bitcast i64* %p1 to i8*
   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
@@ -324,12 +369,12 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
 }
 
 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
-; CHECK-LABEL: pairUpNotAlignedSext
-; CHECK-NOT: ldp
-; CHECK: ldursw
-; CHECK-NEXT: ldursw
-; CHECK-NEXT: add
-; CHECK-NEXT: ret
+; CHECK-LABEL: pairUpNotAlignedSext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldursw x8, [x0, #-71]
+; CHECK-NEXT:    ldursw x9, [x0, #-67]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -18
   %bp1 = bitcast i32* %p1 to i8*
   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
@@ -350,9 +395,17 @@ define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
 
 declare void @use-ptr(i32*)
 
-; CHECK-LABEL: ldp_sext_int_pre
-; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
 define i64 @ldp_sext_int_pre(i32* %p) nounwind {
+; CHECK-LABEL: ldp_sext_int_pre:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    add x0, x0, #8
+; CHECK-NEXT:    bl "use-ptr"
+; CHECK-NEXT:    ldpsw x8, x9, [x19, #8]
+; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i32, i32* %p, i64 2
   call void @use-ptr(i32* %ptr)
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
@@ -365,9 +418,17 @@ define i64 @ldp_sext_int_pre(i32* %p) nounwind {
   ret i64 %add
 }
 
-; CHECK-LABEL: ldp_sext_int_post
-; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
 define i64 @ldp_sext_int_post(i32* %p) nounwind {
+; CHECK-LABEL: ldp_sext_int_post:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldpsw x19, x20, [x0], #8
+; CHECK-NEXT:    bl "use-ptr"
+; CHECK-NEXT:    add x0, x20, x19
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
   %tmp1 = load i32, i32* %add.ptr, align 4

diff  --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
index 0ebd35267f772..61837e642a884 100644
--- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -1,61 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define void @bzero_4_heap(i8* nocapture %c) {
 ; CHECK-LABEL: bzero_4_heap:
-; CHECK:       str wzr, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str wzr, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.memset.p0i8.i64(i8* align 4 %c, i8 0, i64 4, i1 false)
   ret void
 }
 
 define void @bzero_8_heap(i8* nocapture %c) {
 ; CHECK-LABEL: bzero_8_heap:
-; CHECK:       str xzr, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str xzr, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 8, i1 false)
   ret void
 }
 
 define void @bzero_12_heap(i8* nocapture %c) {
 ; CHECK-LABEL: bzero_12_heap:
-; CHECK:       str wzr, [x0, #8]
-; CHECK-NEXT:  str xzr, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str wzr, [x0, #8]
+; CHECK-NEXT:    str xzr, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false)
   ret void
 }
 
 define void @bzero_16_heap(i8* nocapture %c) {
 ; CHECK-LABEL: bzero_16_heap:
-; CHECK:       stp xzr, xzr, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp xzr, xzr, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 16, i1 false)
   ret void
 }
 
 define void @bzero_32_heap(i8* nocapture %c) {
 ; CHECK-LABEL: bzero_32_heap:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  stp q0, q0, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    stp q0, q0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 32, i1 false)
   ret void
 }
 
 define void @bzero_64_heap(i8* nocapture %c) {
 ; CHECK-LABEL: bzero_64_heap:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  stp q0, q0, [x0, #32]
-; CHECK-NEXT:  stp q0, q0, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    stp q0, q0, [x0, #32]
+; CHECK-NEXT:    stp q0, q0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 64, i1 false)
   ret void
 }
 
 define void @bzero_4_stack() {
 ; CHECK-LABEL: bzero_4_stack:
-; CHECK:       str wzr, [sp, #12]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %buf = alloca [4 x i8], align 1
   %cast = bitcast [4 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i1 false)
@@ -65,8 +79,14 @@ define void @bzero_4_stack() {
 
 define void @bzero_8_stack() {
 ; CHECK-LABEL: bzero_8_stack:
-; CHECK:       stp x30, xzr, [sp, #-16]!
-; CHECK:       bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %buf = alloca [8 x i8], align 1
   %cast = bitcast [8 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 8, i1 false)
@@ -76,9 +96,18 @@ define void @bzero_8_stack() {
 
 define void @bzero_12_stack() {
 ; CHECK-LABEL: bzero_12_stack:
-; CHECK:       str wzr, [sp, #8]
-; CHECK-NEXT:  str xzr, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str wzr, [sp, #8]
+; CHECK-NEXT:    str xzr, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
   %buf = alloca [12 x i8], align 1
   %cast = bitcast [12 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 12, i1 false)
@@ -88,10 +117,17 @@ define void @bzero_12_stack() {
 
 define void @bzero_16_stack() {
 ; CHECK-LABEL: bzero_16_stack:
-; CHECK:       stp xzr, x30, [sp, #8]
-; CHECK:       mov x0, sp
-; CHECK:       str xzr, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    stp xzr, x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str xzr, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
   %buf = alloca [16 x i8], align 1
   %cast = bitcast [16 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 16, i1 false)
@@ -101,9 +137,18 @@ define void @bzero_16_stack() {
 
 define void @bzero_20_stack() {
 ; CHECK-LABEL: bzero_20_stack:
-; CHECK:       stp xzr, xzr, [sp, #8]
-; CHECK-NEXT:  str wzr, [sp, #24]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    stp xzr, xzr, [sp, #8]
+; CHECK-NEXT:    str wzr, [sp, #24]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %buf = alloca [20 x i8], align 1
   %cast = bitcast [20 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 20, i1 false)
@@ -113,10 +158,19 @@ define void @bzero_20_stack() {
 
 define void @bzero_26_stack() {
 ; CHECK-LABEL: bzero_26_stack:
-; CHECK:       stp xzr, xzr, [sp]
-; CHECK-NEXT:  strh wzr, [sp, #24]
-; CHECK-NEXT:  str xzr, [sp, #16]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp xzr, xzr, [sp]
+; CHECK-NEXT:    strh wzr, [sp, #24]
+; CHECK-NEXT:    str xzr, [sp, #16]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %buf = alloca [26 x i8], align 1
   %cast = bitcast [26 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 26, i1 false)
@@ -126,10 +180,18 @@ define void @bzero_26_stack() {
 
 define void @bzero_32_stack() {
 ; CHECK-LABEL: bzero_32_stack:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %buf = alloca [32 x i8], align 1
   %cast = bitcast [32 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 32, i1 false)
@@ -139,11 +201,19 @@ define void @bzero_32_stack() {
 
 define void @bzero_40_stack() {
 ; CHECK-LABEL: bzero_40_stack:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  str xzr, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT: bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str xzr, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
   %buf = alloca [40 x i8], align 1
   %cast = bitcast [40 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 40, i1 false)
@@ -153,11 +223,19 @@ define void @bzero_40_stack() {
 
 define void @bzero_64_stack() {
 ; CHECK-LABEL: bzero_64_stack:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
   %buf = alloca [64 x i8], align 1
   %cast = bitcast [64 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 64, i1 false)
@@ -167,12 +245,20 @@ define void @bzero_64_stack() {
 
 define void @bzero_72_stack() {
 ; CHECK-LABEL: bzero_72_stack:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  str xzr, [sp, #64]
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str xzr, [sp, #64]
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
   %buf = alloca [72 x i8], align 1
   %cast = bitcast [72 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 72, i1 false)
@@ -182,13 +268,21 @@ define void @bzero_72_stack() {
 
 define void @bzero_128_stack() {
 ; CHECK-LABEL: bzero_128_stack:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp, #96]
-; CHECK-NEXT:  stp q0, q0, [sp, #64]
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #144
+; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp, #96]
+; CHECK-NEXT:    stp q0, q0, [sp, #64]
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #144
+; CHECK-NEXT:    ret
   %buf = alloca [128 x i8], align 1
   %cast = bitcast [128 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 128, i1 false)
@@ -198,17 +292,26 @@ define void @bzero_128_stack() {
 
 define void @bzero_256_stack() {
 ; CHECK-LABEL: bzero_256_stack:
-; CHECK:       movi v0.2d, #0000000000000000
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp, #224]
-; CHECK-NEXT:  stp q0, q0, [sp, #192]
-; CHECK-NEXT:  stp q0, q0, [sp, #160]
-; CHECK-NEXT:  stp q0, q0, [sp, #128]
-; CHECK-NEXT:  stp q0, q0, [sp, #96]
-; CHECK-NEXT:  stp q0, q0, [sp, #64]
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #272
+; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp, #224]
+; CHECK-NEXT:    stp q0, q0, [sp, #192]
+; CHECK-NEXT:    stp q0, q0, [sp, #160]
+; CHECK-NEXT:    stp q0, q0, [sp, #128]
+; CHECK-NEXT:    stp q0, q0, [sp, #96]
+; CHECK-NEXT:    stp q0, q0, [sp, #64]
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #272
+; CHECK-NEXT:    ret
   %buf = alloca [256 x i8], align 1
   %cast = bitcast [256 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 256, i1 false)
@@ -218,10 +321,16 @@ define void @bzero_256_stack() {
 
 define void @memset_4_stack() {
 ; CHECK-LABEL: memset_4_stack:
-; CHECK:       mov w8, #-1431655766
-; CHECK-NEXT:  add x0, sp, #12
-; CHECK-NEXT:  str w8, [sp, #12]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w8, #-1431655766
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    str w8, [sp, #12]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %buf = alloca [4 x i8], align 1
   %cast = bitcast [4 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 4, i1 false)
@@ -231,10 +340,15 @@ define void @memset_4_stack() {
 
 define void @memset_8_stack() {
 ; CHECK-LABEL: memset_8_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  stp x30, x8, [sp, #-16]!
-; CHECK-NEXT:  add x0, sp, #8
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    stp x30, x8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %buf = alloca [8 x i8], align 1
   %cast = bitcast [8 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 8, i1 false)
@@ -244,11 +358,19 @@ define void @memset_8_stack() {
 
 define void @memset_12_stack() {
 ; CHECK-LABEL: memset_12_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  str x8, [sp]
-; CHECK-NEXT:  str w8, [sp, #8]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str x8, [sp]
+; CHECK-NEXT:    str w8, [sp, #8]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
   %buf = alloca [12 x i8], align 1
   %cast = bitcast [12 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 12, i1 false)
@@ -258,11 +380,18 @@ define void @memset_12_stack() {
 
 define void @memset_16_stack() {
 ; CHECK-LABEL: memset_16_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp x8, x30, [sp, #8]
-; CHECK-NEXT:  str x8, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp x8, x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    str x8, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
   %buf = alloca [16 x i8], align 1
   %cast = bitcast [16 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 16, i1 false)
@@ -272,11 +401,19 @@ define void @memset_16_stack() {
 
 define void @memset_20_stack() {
 ; CHECK-LABEL: memset_20_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  add x0, sp, #8
-; CHECK-NEXT:  stp x8, x8, [sp, #8]
-; CHECK-NEXT:  str w8, [sp, #24]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    stp x8, x8, [sp, #8]
+; CHECK-NEXT:    str w8, [sp, #24]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %buf = alloca [20 x i8], align 1
   %cast = bitcast [20 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 20, i1 false)
@@ -286,12 +423,20 @@ define void @memset_20_stack() {
 
 define void @memset_26_stack() {
 ; CHECK-LABEL: memset_26_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp x8, x8, [sp, #8]
-; CHECK-NEXT:  str x8, [sp]
-; CHECK-NEXT:  strh w8, [sp, #24]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp x8, x8, [sp, #8]
+; CHECK-NEXT:    str x8, [sp]
+; CHECK-NEXT:    strh w8, [sp, #24]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %buf = alloca [26 x i8], align 1
   %cast = bitcast [26 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 26, i1 false)
@@ -301,10 +446,18 @@ define void @memset_26_stack() {
 
 define void @memset_32_stack() {
 ; CHECK-LABEL: memset_32_stack:
-; CHECK:       movi v0.16b, #170
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.16b, #170
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %buf = alloca [32 x i8], align 1
   %cast = bitcast [32 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 32, i1 false)
@@ -314,12 +467,20 @@ define void @memset_32_stack() {
 
 define void @memset_40_stack() {
 ; CHECK-LABEL: memset_40_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  movi v0.16b, #170
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  str x8, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT: bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    movi v0.16b, #170
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str x8, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
   %buf = alloca [40 x i8], align 1
   %cast = bitcast [40 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 40, i1 false)
@@ -329,11 +490,19 @@ define void @memset_40_stack() {
 
 define void @memset_64_stack() {
 ; CHECK-LABEL: memset_64_stack:
-; CHECK:       movi v0.16b, #170
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.16b, #170
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
   %buf = alloca [64 x i8], align 1
   %cast = bitcast [64 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 64, i1 false)
@@ -343,13 +512,21 @@ define void @memset_64_stack() {
 
 define void @memset_72_stack() {
 ; CHECK-LABEL: memset_72_stack:
-; CHECK:       mov x8, #-6148914691236517206
-; CHECK-NEXT:  movi v0.16b, #170
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  str x8, [sp, #64]
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    movi v0.16b, #170
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str x8, [sp, #64]
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
   %buf = alloca [72 x i8], align 1
   %cast = bitcast [72 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 72, i1 false)
@@ -359,13 +536,21 @@ define void @memset_72_stack() {
 
 define void @memset_128_stack() {
 ; CHECK-LABEL: memset_128_stack:
-; CHECK:       movi v0.16b, #170
-; CHECK-NEXT:  mov x0, sp
-; CHECK-NEXT:  stp q0, q0, [sp, #96]
-; CHECK-NEXT:  stp q0, q0, [sp, #64]
-; CHECK-NEXT:  stp q0, q0, [sp, #32]
-; CHECK-NEXT:  stp q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #144
+; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v0.16b, #170
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp, #96]
+; CHECK-NEXT:    stp q0, q0, [sp, #64]
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #144
+; CHECK-NEXT:    ret
   %buf = alloca [128 x i8], align 1
   %cast = bitcast [128 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 128, i1 false)
@@ -375,17 +560,26 @@ define void @memset_128_stack() {
 
 define void @memset_256_stack() {
 ; CHECK-LABEL: memset_256_stack:
-; CHECK:       movi	v0.16b, #170
-; CHECK-NEXT:  mov	x0, sp
-; CHECK-NEXT:  stp	q0, q0, [sp, #224]
-; CHECK-NEXT:  stp	q0, q0, [sp, #192]
-; CHECK-NEXT:  stp	q0, q0, [sp, #160]
-; CHECK-NEXT:  stp	q0, q0, [sp, #128]
-; CHECK-NEXT:  stp	q0, q0, [sp, #96]
-; CHECK-NEXT:  stp	q0, q0, [sp, #64]
-; CHECK-NEXT:  stp	q0, q0, [sp, #32]
-; CHECK-NEXT:  stp	q0, q0, [sp]
-; CHECK-NEXT:  bl something
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #272
+; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    movi v0.16b, #170
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    stp q0, q0, [sp, #224]
+; CHECK-NEXT:    stp q0, q0, [sp, #192]
+; CHECK-NEXT:    stp q0, q0, [sp, #160]
+; CHECK-NEXT:    stp q0, q0, [sp, #128]
+; CHECK-NEXT:    stp q0, q0, [sp, #96]
+; CHECK-NEXT:    stp q0, q0, [sp, #64]
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    bl something
+; CHECK-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #272
+; CHECK-NEXT:    ret
   %buf = alloca [256 x i8], align 1
   %cast = bitcast [256 x i8]* %buf to i8*
   call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 256, i1 false)

diff  --git a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
index cfd3d6ad7bd8a..5cdcbf14292c3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
@@ -1,11 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s
 
 ; Small (16 bytes here) unaligned memcpy() should be a function call if
 ; strict-alignment is turned on.
 define void @t0(i8* %out, i8* %in) {
 ; CHECK-LABEL: t0:
-; CHECK:      mov w2, #16
-; CHECK-NEXT: bl _memcpy
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    mov w2, #16
+; CHECK-NEXT:    bl _memcpy
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false)
   ret void

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
index 5995de2942ea7..488ddbfc2c6bd 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
@@ -1,11 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s
 
 ; If %base < 96 then the sum will not wrap (in an unsigned sense), but "ldr w0,
 ; [x0, #-96]" would.
 define i32 @test_valid_wrap(i32 %base) {
 ; CHECK-LABEL: test_valid_wrap:
-; CHECK: sub w[[ADDR:[0-9]+]], w0, #96
-; CHECK: ldr w0, [x[[ADDR]]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub w8, w0, #96
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:    ret
 
   %newaddr = add nuw i32 %base, -96
   %ptr = inttoptr i32 %newaddr to i32*
@@ -15,7 +18,9 @@ define i32 @test_valid_wrap(i32 %base) {
 
 define i8 @test_valid_wrap_optimizable(i8* %base) {
 ; CHECK-LABEL: test_valid_wrap_optimizable:
-; CHECK: ldurb w0, [x0, #-96]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldurb w0, [x0, #-96]
+; CHECK-NEXT:    ret
 
   %newaddr = getelementptr inbounds i8, i8* %base, i32 -96
   %val = load i8, i8* %newaddr
@@ -24,7 +29,9 @@ define i8 @test_valid_wrap_optimizable(i8* %base) {
 
 define i8 @test_valid_wrap_optimizable1(i8* %base, i32 %offset) {
 ; CHECK-LABEL: test_valid_wrap_optimizable1:
-; CHECK: ldrb w0, [x0, w1, sxtw]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldrb w0, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
 
   %newaddr = getelementptr inbounds i8, i8* %base, i32 %offset
   %val = load i8, i8* %newaddr
@@ -34,9 +41,12 @@ define i8 @test_valid_wrap_optimizable1(i8* %base, i32 %offset) {
 ;
 define i8 @test_valid_wrap_optimizable2(i8* %base, i32 %offset) {
 ; CHECK-LABEL: test_valid_wrap_optimizable2:
-; CHECK: sxtw x[[OFFSET:[0-9]+]], w1
-; CHECK: mov w[[BASE:[0-9]+]], #-100
-; CHECK: ldrb w0, [x[[OFFSET]], x[[BASE]]]
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    mov w9, #-100
+; CHECK-NEXT:    ldrb w0, [x8, x9]
+; CHECK-NEXT:    ret
 
   %newaddr = getelementptr inbounds i8, i8* inttoptr(i32 -100 to i8*), i32 %offset
   %val = load i8, i8* %newaddr


        


More information about the llvm-commits mailing list