[llvm] c6ae7df - [AArch64][GlobalISel] Regenerate arm64-ld1.ll

Thu Sep 7 08:48:27 PDT 2023

Author: Vladislav Dzhidzhoev
Date: 2023-09-07T17:48:15+02:00
New Revision: c6ae7df999e841e23a1610dd453d88cb47ba4256

URL: https://github.com/llvm/llvm-project/commit/c6ae7df999e841e23a1610dd453d88cb47ba4256
DIFF: https://github.com/llvm/llvm-project/commit/c6ae7df999e841e23a1610dd453d88cb47ba4256.diff

LOG: [AArch64][GlobalISel] Regenerate arm64-ld1.ll

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/arm64-ld1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 161424eaf11e38..47fb3308175b02 100644

--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1,33 +1,37 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
 
 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
 %struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
 
 define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind {
-; CHECK-LABEL: ld2_8b
+; CHECK-LABEL: ld2_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.8b { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
 ; and from the argument of the function also defined by ABI (i.e., x0)
-; CHECK: ld2.8b { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
 	ret %struct.__neon_int8x8x2_t  %tmp2
 }
 
 define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind {
-; CHECK-LABEL: ld3_8b
+; CHECK-LABEL: ld3_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.8b { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
 	ret %struct.__neon_int8x8x3_t  %tmp2
 }
 
 define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind {
-; CHECK-LABEL: ld4_8b
+; CHECK-LABEL: ld4_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
 	ret %struct.__neon_int8x8x4_t  %tmp2
 }
@@ -41,28 +45,31 @@ declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr) nounwind r
 %struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
 
 define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind {
-; CHECK-LABEL: ld2_16b
+; CHECK-LABEL: ld2_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.16b { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2.16b { v0, v1 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
   ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
 define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind {
-; CHECK-LABEL: ld3_16b
+; CHECK-LABEL: ld3_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.16b { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
   ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
 define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind {
-; CHECK-LABEL: ld4_16b
+; CHECK-LABEL: ld4_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
   ret %struct.__neon_int8x16x4_t  %tmp2
 }
@@ -76,28 +83,31 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr) nounwind
 %struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
 
 define %struct.__neon_int16x4x2_t @ld2_4h(ptr %A) nounwind {
-; CHECK-LABEL: ld2_4h
+; CHECK-LABEL: ld2_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.4h { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2.4h { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
 	ret %struct.__neon_int16x4x2_t  %tmp2
 }
 
 define %struct.__neon_int16x4x3_t @ld3_4h(ptr %A) nounwind {
-; CHECK-LABEL: ld3_4h
+; CHECK-LABEL: ld3_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.4h { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
 	ret %struct.__neon_int16x4x3_t  %tmp2
 }
 
 define %struct.__neon_int16x4x4_t @ld4_4h(ptr %A) nounwind {
-; CHECK-LABEL: ld4_4h
+; CHECK-LABEL: ld4_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
 	ret %struct.__neon_int16x4x4_t  %tmp2
 }
@@ -111,28 +121,31 @@ declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr) nounwind
 %struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
 
 define %struct.__neon_int16x8x2_t @ld2_8h(ptr %A) nounwind {
-; CHECK-LABEL: ld2_8h
+; CHECK-LABEL: ld2_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.8h { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2.8h { v0, v1 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
   ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
 define %struct.__neon_int16x8x3_t @ld3_8h(ptr %A) nounwind {
-; CHECK-LABEL: ld3_8h
+; CHECK-LABEL: ld3_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.8h { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
   ret %struct.__neon_int16x8x3_t %tmp2
 }
 
 define %struct.__neon_int16x8x4_t @ld4_8h(ptr %A) nounwind {
-; CHECK-LABEL: ld4_8h
+; CHECK-LABEL: ld4_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
   ret %struct.__neon_int16x8x4_t  %tmp2
 }
@@ -146,28 +159,31 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr) nounwind
 %struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
 
 define %struct.__neon_int32x2x2_t @ld2_2s(ptr %A) nounwind {
-; CHECK-LABEL: ld2_2s
+; CHECK-LABEL: ld2_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.2s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2.2s { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
 	ret %struct.__neon_int32x2x2_t  %tmp2
 }
 
 define %struct.__neon_int32x2x3_t @ld3_2s(ptr %A) nounwind {
-; CHECK-LABEL: ld3_2s
+; CHECK-LABEL: ld3_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.2s { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
 	ret %struct.__neon_int32x2x3_t  %tmp2
 }
 
 define %struct.__neon_int32x2x4_t @ld4_2s(ptr %A) nounwind {
-; CHECK-LABEL: ld4_2s
+; CHECK-LABEL: ld4_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
 	ret %struct.__neon_int32x2x4_t  %tmp2
 }
@@ -181,28 +197,31 @@ declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr) nounwind
 %struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
 
 define %struct.__neon_int32x4x2_t @ld2_4s(ptr %A) nounwind {
-; CHECK-LABEL: ld2_4s
+; CHECK-LABEL: ld2_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.4s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2.4s { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
 define %struct.__neon_int32x4x3_t @ld3_4s(ptr %A) nounwind {
-; CHECK-LABEL: ld3_4s
+; CHECK-LABEL: ld3_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.4s { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
 define %struct.__neon_int32x4x4_t @ld4_4s(ptr %A) nounwind {
-; CHECK-LABEL: ld4_4s
+; CHECK-LABEL: ld4_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
@@ -216,28 +235,31 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr) nounwind
 %struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
 
 define %struct.__neon_int64x2x2_t @ld2_2d(ptr %A) nounwind {
-; CHECK-LABEL: ld2_2d
+; CHECK-LABEL: ld2_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2.2d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2.2d { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
 define %struct.__neon_int64x2x3_t @ld3_2d(ptr %A) nounwind {
-; CHECK-LABEL: ld3_2d
+; CHECK-LABEL: ld3_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3.2d { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
 define %struct.__neon_int64x2x4_t @ld4_2d(ptr %A) nounwind {
-; CHECK-LABEL: ld4_2d
+; CHECK-LABEL: ld4_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
@@ -252,28 +274,31 @@ declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr) nounwind
 
 
 define %struct.__neon_int64x1x2_t @ld2_1di64(ptr %A) nounwind {
-; CHECK-LABEL: ld2_1di64
+; CHECK-LABEL: ld2_1di64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.1d { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
 	ret %struct.__neon_int64x1x2_t  %tmp2
 }
 
 define %struct.__neon_int64x1x3_t @ld3_1di64(ptr %A) nounwind {
-; CHECK-LABEL: ld3_1di64
+; CHECK-LABEL: ld3_1di64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
 	ret %struct.__neon_int64x1x3_t  %tmp2
 }
 
 define %struct.__neon_int64x1x4_t @ld4_1di64(ptr %A) nounwind {
-; CHECK-LABEL: ld4_1di64
+; CHECK-LABEL: ld4_1di64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
 	ret %struct.__neon_int64x1x4_t  %tmp2
 }
@@ -289,28 +314,31 @@ declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr) nounwind
 
 
 define %struct.__neon_float64x1x2_t @ld2_1df64(ptr %A) nounwind {
-; CHECK-LABEL: ld2_1df64
+; CHECK-LABEL: ld2_1df64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.1d { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
 	ret %struct.__neon_float64x1x2_t  %tmp2
 }
 
 define %struct.__neon_float64x1x3_t @ld3_1df64(ptr %A) nounwind {
-; CHECK-LABEL: ld3_1df64
+; CHECK-LABEL: ld3_1df64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
 	ret %struct.__neon_float64x1x3_t  %tmp2
 }
 
 define %struct.__neon_float64x1x4_t @ld4_1df64(ptr %A) nounwind {
-; CHECK-LABEL: ld4_1df64
+; CHECK-LABEL: ld4_1df64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
 	ret %struct.__neon_float64x1x4_t  %tmp2
 }
@@ -322,27 +350,39 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi
 
 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_16b
-; CHECK: ld2.b { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld2lane_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.b { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
 define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_16b
-; CHECK: ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld3lane_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
 define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_16b
-; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld4lane_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x4_t  %tmp2
 }
@@ -353,27 +393,39 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>
 
 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_8h
-; CHECK: ld2.h { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld2lane_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.h { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
 define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_8h
-; CHECK: ld3.h { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld3lane_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x3_t  %tmp2
 }
 
 define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_8h
-; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld4lane_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x4_t  %tmp2
 }
@@ -384,27 +436,39 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>
 
 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_4s
-; CHECK: ld2.s { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld2lane_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.s { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
 define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_4s
-; CHECK: ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld3lane_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
 define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_4s
-; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld4lane_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
@@ -415,27 +479,39 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>
 
 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_2d
-; CHECK: ld2.d { v0, v1 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld2lane_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    ld2.d { v0, v1 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
 define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_2d
-; CHECK: ld3.d { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld3lane_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
 define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_2d
-; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld4lane_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
@@ -445,10 +521,11 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64>
 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
 
 define <8 x i8> @ld1r_8b(ptr %bar) {
-; CHECK: ld1r_8b
+; CHECK-LABEL: ld1r_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.8b { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.8b { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
@@ -462,10 +539,11 @@ define <8 x i8> @ld1r_8b(ptr %bar) {
 }
 
 define <16 x i8> @ld1r_16b(ptr %bar) {
-; CHECK: ld1r_16b
+; CHECK-LABEL: ld1r_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.16b { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.16b { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
@@ -487,10 +565,11 @@ define <16 x i8> @ld1r_16b(ptr %bar) {
 }
 
 define <4 x i16> @ld1r_4h(ptr %bar) {
-; CHECK: ld1r_4h
+; CHECK-LABEL: ld1r_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.4h { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4h { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
@@ -500,10 +579,11 @@ define <4 x i16> @ld1r_4h(ptr %bar) {
 }
 
 define <8 x i16> @ld1r_8h(ptr %bar) {
-; CHECK: ld1r_8h
+; CHECK-LABEL: ld1r_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.8h { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.8h { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
@@ -517,10 +597,11 @@ define <8 x i16> @ld1r_8h(ptr %bar) {
 }
 
 define <2 x i32> @ld1r_2s(ptr %bar) {
-; CHECK: ld1r_2s
+; CHECK-LABEL: ld1r_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
@@ -528,10 +609,11 @@ define <2 x i32> @ld1r_2s(ptr %bar) {
 }
 
 define <4 x i32> @ld1r_4s(ptr %bar) {
-; CHECK: ld1r_4s
+; CHECK-LABEL: ld1r_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
@@ -541,10 +623,11 @@ define <4 x i32> @ld1r_4s(ptr %bar) {
 }
 
 define <2 x i64> @ld1r_2d(ptr %bar) {
-; CHECK: ld1r_2d
+; CHECK-LABEL: ld1r_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i64, ptr %bar
   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
@@ -552,28 +635,31 @@ define <2 x i64> @ld1r_2d(ptr %bar) {
 }
 
 define %struct.__neon_int8x8x2_t @ld2r_8b(ptr %A) nounwind {
-; CHECK: ld2r_8b
+; CHECK-LABEL: ld2r_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.8b { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.8b { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
 	ret %struct.__neon_int8x8x2_t  %tmp2
 }
 
 define %struct.__neon_int8x8x3_t @ld3r_8b(ptr %A) nounwind {
-; CHECK: ld3r_8b
+; CHECK-LABEL: ld3r_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.8b { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
 	ret %struct.__neon_int8x8x3_t  %tmp2
 }
 
 define %struct.__neon_int8x8x4_t @ld4r_8b(ptr %A) nounwind {
-; CHECK: ld4r_8b
+; CHECK-LABEL: ld4r_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
 	ret %struct.__neon_int8x8x4_t  %tmp2
 }
@@ -583,28 +669,31 @@ declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) nounwind
 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly
 
 define %struct.__neon_int8x16x2_t @ld2r_16b(ptr %A) nounwind {
-; CHECK: ld2r_16b
+; CHECK-LABEL: ld2r_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.16b { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.16b { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
 	ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
 define %struct.__neon_int8x16x3_t @ld3r_16b(ptr %A) nounwind {
-; CHECK: ld3r_16b
+; CHECK-LABEL: ld3r_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.16b { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
 	ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
 define %struct.__neon_int8x16x4_t @ld4r_16b(ptr %A) nounwind {
-; CHECK: ld4r_16b
+; CHECK-LABEL: ld4r_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
 	ret %struct.__neon_int8x16x4_t  %tmp2
 }
@@ -614,28 +703,31 @@ declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr) nounwin
 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly
 
 define %struct.__neon_int16x4x2_t @ld2r_4h(ptr %A) nounwind {
-; CHECK: ld2r_4h
+; CHECK-LABEL: ld2r_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.4h { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.4h { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
 	ret %struct.__neon_int16x4x2_t  %tmp2
 }
 
 define %struct.__neon_int16x4x3_t @ld3r_4h(ptr %A) nounwind {
-; CHECK: ld3r_4h
+; CHECK-LABEL: ld3r_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.4h { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
 	ret %struct.__neon_int16x4x3_t  %tmp2
 }
 
 define %struct.__neon_int16x4x4_t @ld4r_4h(ptr %A) nounwind {
-; CHECK: ld4r_4h
+; CHECK-LABEL: ld4r_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
 	ret %struct.__neon_int16x4x4_t  %tmp2
 }
@@ -645,28 +737,31 @@ declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr) nounwin
 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly
 
 define %struct.__neon_int16x8x2_t @ld2r_8h(ptr %A) nounwind {
-; CHECK: ld2r_8h
+; CHECK-LABEL: ld2r_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.8h { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.8h { v0, v1 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
   ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
 define %struct.__neon_int16x8x3_t @ld3r_8h(ptr %A) nounwind {
-; CHECK: ld3r_8h
+; CHECK-LABEL: ld3r_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.8h { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
   ret %struct.__neon_int16x8x3_t  %tmp2
 }
 
 define %struct.__neon_int16x8x4_t @ld4r_8h(ptr %A) nounwind {
-; CHECK: ld4r_8h
+; CHECK-LABEL: ld4r_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
   %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
   ret %struct.__neon_int16x8x4_t  %tmp2
 }
@@ -676,28 +771,31 @@ declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr) nounwin
 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly
 
 define %struct.__neon_int32x2x2_t @ld2r_2s(ptr %A) nounwind {
-; CHECK: ld2r_2s
+; CHECK-LABEL: ld2r_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.2s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.2s { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
 	ret %struct.__neon_int32x2x2_t  %tmp2
 }
 
 define %struct.__neon_int32x2x3_t @ld3r_2s(ptr %A) nounwind {
-; CHECK: ld3r_2s
+; CHECK-LABEL: ld3r_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.2s { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
 	ret %struct.__neon_int32x2x3_t  %tmp2
 }
 
 define %struct.__neon_int32x2x4_t @ld4r_2s(ptr %A) nounwind {
-; CHECK: ld4r_2s
+; CHECK-LABEL: ld4r_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
 	ret %struct.__neon_int32x2x4_t  %tmp2
 }
@@ -707,28 +805,31 @@ declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr) nounwin
 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly
 
 define %struct.__neon_int32x4x2_t @ld2r_4s(ptr %A) nounwind {
-; CHECK: ld2r_4s
+; CHECK-LABEL: ld2r_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.4s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.4s { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
 define %struct.__neon_int32x4x3_t @ld3r_4s(ptr %A) nounwind {
-; CHECK: ld3r_4s
+; CHECK-LABEL: ld3r_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.4s { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
 define %struct.__neon_int32x4x4_t @ld4r_4s(ptr %A) nounwind {
-; CHECK: ld4r_4s
+; CHECK-LABEL: ld4r_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
@@ -738,28 +839,31 @@ declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwin
 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly
 
 define %struct.__neon_int64x1x2_t @ld2r_1d(ptr %A) nounwind {
-; CHECK: ld2r_1d
+; CHECK-LABEL: ld2r_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.1d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.1d { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
 	ret %struct.__neon_int64x1x2_t  %tmp2
 }
 
 define %struct.__neon_int64x1x3_t @ld3r_1d(ptr %A) nounwind {
-; CHECK: ld3r_1d
+; CHECK-LABEL: ld3r_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
 	ret %struct.__neon_int64x1x3_t  %tmp2
 }
 
 define %struct.__neon_int64x1x4_t @ld4r_1d(ptr %A) nounwind {
-; CHECK: ld4r_1d
+; CHECK-LABEL: ld4r_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
 	ret %struct.__neon_int64x1x4_t  %tmp2
 }
@@ -769,28 +873,31 @@ declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr) nounwin
 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly
 
 define %struct.__neon_int64x2x2_t @ld2r_2d(ptr %A) nounwind {
-; CHECK: ld2r_2d
+; CHECK-LABEL: ld2r_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2r.2d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld2r.2d { v0, v1 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
 define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind {
-; CHECK: ld3r_2d
+; CHECK-LABEL: ld3r_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld3r.2d { v0, v1, v2 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
 define %struct.__neon_int64x2x4_t @ld4r_2d(ptr %A) nounwind {
-; CHECK: ld4r_2d
+; CHECK-LABEL: ld4r_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT: ret
 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
@@ -800,30 +907,33 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin
 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
 
 define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
-; CHECK-LABEL: ld1_16b
+; CHECK-LABEL: ld1_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.b { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.b { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
   ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
-; CHECK-LABEL: ld1_8h
+; CHECK-LABEL: ld1_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.h { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
   ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
-; CHECK-LABEL: ld1_4s
+; CHECK-LABEL: ld1_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
   ret <4 x i32> %tmp2
@@ -831,19 +941,21 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
 
 define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_4s_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
   ret <4 x float> %tmp2
 }
 
 define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
-; CHECK-LABEL: ld1_2d
+; CHECK-LABEL: ld1_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.d { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i64, ptr %bar
   %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
   ret <2 x i64> %tmp2
@@ -851,38 +963,46 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
 
 define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2d_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.d { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load double, ptr %bar
   %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
   ret <2 x double> %tmp2
 }
 
 define <1 x i64> @ld1_1d(ptr %p) {
-; CHECK-LABEL: ld1_1d
+; CHECK-LABEL: ld1_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ldr [[REG:d[0-9]+]], [x0]
-; CHECK-NEXT: ret
   %tmp = load <1 x i64>, ptr %p, align 8
   ret <1 x i64> %tmp
 }
 
 define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
-; CHECK-LABEL: ld1_8b
+; CHECK-LABEL: ld1_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.b { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.b { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
   ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
-; CHECK-LABEL: ld1_4h
+; CHECK-LABEL: ld1_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.h { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
   ret <4 x i16> %tmp2
@@ -890,9 +1010,12 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 
 define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
   ret <2 x i32> %tmp2
@@ -900,9 +1023,12 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 
 define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2s_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT: ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
   ret <2 x float> %tmp2
@@ -911,13 +1037,14 @@ define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
 
 ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
 define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %
diff ) nounwind ssp {
+; CHECK-LABEL: ld1r_2s_from_dup:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
+; CHECK-NEXT:    ld1r.2s { v1 }, [x1]
+; CHECK-NEXT:    usubl.8h v0, v0, v1
+; CHECK-NEXT:    str d0, [x2]
+; CHECK-NEXT:    ret
 entry:
-; CHECK: ld1r_2s_from_dup
-; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
-; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
-; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
-; CHECK-NEXT: str d[[RESREGNUM]], [x2]
-; CHECK-NEXT: ret
   %tmp1 = load i32, ptr %a, align 4
   %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
   %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -938,11 +1065,12 @@ entry:
 
 ; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
 define <4 x float> @ld1r_4s_float(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_4s_float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_4s_float
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp = load float, ptr %x, align 4
   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
   %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
@@ -952,11 +1080,12 @@ entry:
 }
 
 define <2 x float> @ld1r_2s_float(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_2s_float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_2s_float
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp = load float, ptr %x, align 4
   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
   %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
@@ -964,11 +1093,12 @@ entry:
 }
 
 define <2 x double> @ld1r_2d_double(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_2d_double:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_2d_double
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp = load double, ptr %x, align 4
   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
   %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
@@ -976,22 +1106,24 @@ entry:
 }
 
 define <1 x double> @ld1r_1d_double(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_1d_double:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_1d_double
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ldr d0, [x0]
-; CHECK-NEXT: ret
   %tmp = load double, ptr %x, align 4
   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
   ret <1 x double> %tmp1
 }
 
 define <4 x float> @ld1r_4s_float_shuff(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_4s_float_shuff:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.4s { v0 }, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_4s_float_shuff
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp = load float, ptr %x, align 4
   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
   %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@@ -999,11 +1131,12 @@ entry:
 }
 
 define <2 x float> @ld1r_2s_float_shuff(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_2s_float_shuff:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_2s_float_shuff
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp = load float, ptr %x, align 4
   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
   %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -1011,11 +1144,12 @@ entry:
 }
 
 define <2 x double> @ld1r_2d_double_shuff(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_2d_double_shuff:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r.2d { v0 }, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_2d_double_shuff
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT: ret
   %tmp = load double, ptr %x, align 4
   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
   %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
@@ -1023,11 +1157,12 @@ entry:
 }
 
 define <1 x double> @ld1r_1d_double_shuff(ptr nocapture %x) {
+; CHECK-LABEL: ld1r_1d_double_shuff:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ld1r_1d_double_shuff
 ; Make sure we are using the operands defined by the ABI
-; CHECK: ldr d0, [x0]
-; CHECK-NEXT: ret
   %tmp = load double, ptr %x, align 4
   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
   %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
@@ -1047,42 +1182,54 @@ declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) noun
 
 define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v8i8:
-; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %addr)
   ret %struct.__neon_int8x8x2_t %val
 }
 
 define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v4i16:
-; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4h { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %addr)
   ret %struct.__neon_int16x4x2_t %val
 }
 
 define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v2i32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %addr)
   ret %struct.__neon_int32x2x2_t %val
 }
 
 define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v2f32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %addr)
   ret %struct.__neon_float32x2x2_t %val
 }
 
 define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v1i64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %addr)
   ret %struct.__neon_int64x1x2_t %val
 }
 
 define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v1f64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %addr)
   ret %struct.__neon_float64x1x2_t %val
 }
@@ -1105,42 +1252,54 @@ declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) noun
 
 define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v16i8:
-; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %addr)
   ret %struct.__neon_int8x16x2_t %val
 }
 
 define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v8i16:
-; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.8h { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %addr)
   ret %struct.__neon_int16x8x2_t %val
 }
 
 define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v4i32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %addr)
   ret %struct.__neon_int32x4x2_t %val
 }
 
 define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v4f32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %addr)
   ret %struct.__neon_float32x4x2_t %val
 }
 
 define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v2i64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %addr)
   ret %struct.__neon_int64x2x2_t %val
 }
 
 define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(ptr %addr) {
 ; CHECK-LABEL: ld1_x2_v2f64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %addr)
   ret %struct.__neon_float64x2x2_t %val
 }
@@ -1154,42 +1313,54 @@ declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr) noun
 
 define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v8i8:
-; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %addr)
   ret %struct.__neon_int8x8x3_t %val
 }
 
 define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v4i16:
-; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %addr)
   ret %struct.__neon_int16x4x3_t %val
 }
 
 define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v2i32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %addr)
   ret %struct.__neon_int32x2x3_t %val
 }
 
 define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v2f32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %addr)
   ret %struct.__neon_float32x2x3_t %val
 }
 
 define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v1i64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %addr)
   ret %struct.__neon_int64x1x3_t %val
 }
 
 define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v1f64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %addr)
   ret %struct.__neon_float64x1x3_t %val
 }
@@ -1203,42 +1374,54 @@ declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr) noun
 
 define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v16i8:
-; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %addr)
   ret %struct.__neon_int8x16x3_t %val
 }
 
 define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v8i16:
-; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %addr)
   ret %struct.__neon_int16x8x3_t %val
 }
 
 define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v4i32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %addr)
   ret %struct.__neon_int32x4x3_t %val
 }
 
 define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v4f32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %addr)
   ret %struct.__neon_float32x4x3_t %val
 }
 
 define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v2i64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %addr)
   ret %struct.__neon_int64x2x3_t %val
 }
 
 define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(ptr %addr) {
 ; CHECK-LABEL: ld1_x3_v2f64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %addr)
   ret %struct.__neon_float64x2x3_t %val
 }
@@ -1252,42 +1435,54 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr) noun
 
 define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v8i8:
-; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %addr)
   ret %struct.__neon_int8x8x4_t %val
 }
 
 define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v4i16:
-; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %addr)
   ret %struct.__neon_int16x4x4_t %val
 }
 
 define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v2i32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %addr)
   ret %struct.__neon_int32x2x4_t %val
 }
 
 define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v2f32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %addr)
   ret %struct.__neon_float32x2x4_t %val
 }
 
 define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v1i64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %addr)
   ret %struct.__neon_int64x1x4_t %val
 }
 
 define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v1f64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %addr)
   ret %struct.__neon_float64x1x4_t %val
 }
@@ -1301,42 +1496,54 @@ declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr) noun
 
 define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v16i8:
-; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %addr)
   ret %struct.__neon_int8x16x4_t %val
 }
 
 define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v8i16:
-; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %addr)
   ret %struct.__neon_int16x8x4_t %val
 }
 
 define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v4i32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %addr)
   ret %struct.__neon_int32x4x4_t %val
 }
 
 define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v4f32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %addr)
   ret %struct.__neon_float32x4x4_t %val
 }
 
 define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v2i64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %addr)
   ret %struct.__neon_int64x2x4_t %val
 }
 
 define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) {
 ; CHECK-LABEL: ld1_x4_v2f64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT:    ret
   %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr)
   ret %struct.__neon_float64x2x4_t %val
 }