[llvm] fadea44 - [NFC][SVE] Auto-generate CHECK lines for intrinsic codegen tests.

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 26 16:08:35 PDT 2022


Author: Paul Walker
Date: 2022-06-27T00:07:00+01:00
New Revision: fadea4413ecbfffa4d28ad8298e0628165b543f1

URL: https://github.com/llvm/llvm-project/commit/fadea4413ecbfffa4d28ad8298e0628165b543f1
DIFF: https://github.com/llvm/llvm-project/commit/fadea4413ecbfffa4d28ad8298e0628165b543f1.diff

LOG: [NFC][SVE] Auto-generate CHECK lines for intrinsic codegen tests.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
    llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-bit-permutation.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
index 461a860d81936..c12b7a43b65a7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
 ; ADRB
@@ -6,8 +7,9 @@
 
 define <vscale x 4 x i32> @adrb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: adrb_i32:
-; CHECK: adr z0.s, [z0.s, z1.s]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.adrb.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -15,8 +17,9 @@ define <vscale x 4 x i32> @adrb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @adrb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: adrb_i64:
-; CHECK: adr z0.d, [z0.d, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.adrb.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -28,8 +31,9 @@ define <vscale x 2 x i64> @adrb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 4 x i32> @adrh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: adrh_i32:
-; CHECK: adr z0.s, [z0.s, z1.s, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.adrh.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @adrh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @adrh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: adrh_i64:
-; CHECK: adr z0.d, [z0.d, z1.d, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.adrh.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -50,8 +55,9 @@ define <vscale x 2 x i64> @adrh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 4 x i32> @adrw_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: adrw_i32:
-; CHECK: adr z0.s, [z0.s, z1.s, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.adrw.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -59,8 +65,9 @@ define <vscale x 4 x i32> @adrw_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @adrw_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: adrw_i64:
-; CHECK: adr z0.d, [z0.d, z1.d, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.adrw.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -72,8 +79,9 @@ define <vscale x 2 x i64> @adrw_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 4 x i32> @adrd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: adrd_i32:
-; CHECK: adr z0.s, [z0.s, z1.s, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.adrd.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -81,8 +89,9 @@ define <vscale x 4 x i32> @adrd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @adrd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: adrd_i64:
-; CHECK: adr z0.d, [z0.d, z1.d, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.adrd.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
index faccbcc9b3ebc..a3989a97dc988 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
 
 ;
 ; BFDOT
@@ -6,40 +7,45 @@
 
 define <vscale x 4 x float> @bfdot_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfdot_f32:
-; CHECK-NEXT:  bfdot z0.s, z1.h, z2.h
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfdot_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfdot_lane_0_f32:
-; CHECK-NEXT:  bfdot z0.s, z1.h, z2.h[0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfdot_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfdot_lane_1_f32:
-; CHECK-NEXT:  bfdot z0.s, z1.h, z2.h[1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfdot_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfdot_lane_2_f32:
-; CHECK-NEXT:  bfdot z0.s, z1.h, z2.h[2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfdot_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfdot_lane_3_f32:
-; CHECK-NEXT:  bfdot z0.s, z1.h, z2.h[3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
   ret <vscale x 4 x float> %out
 }
@@ -50,72 +56,81 @@ define <vscale x 4 x float> @bfdot_lane_3_f32(<vscale x 4 x float> %a, <vscale x
 
 define <vscale x 4 x float> @bfmlalb_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_0_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_1_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_2_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_3_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_4_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_4_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[4]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[4]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_5_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_5_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[5]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[5]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_6_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_6_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[6]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[6]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalb_lane_7_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalb_lane_7_f32:
-; CHECK-NEXT:  bfmlalb z0.s, z1.h, z2.h[7]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[7]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
   ret <vscale x 4 x float> %out
 }
@@ -126,72 +141,81 @@ define <vscale x 4 x float> @bfmlalb_lane_7_f32(<vscale x 4 x float> %a, <vscale
 
 define <vscale x 4 x float> @bfmlalt_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_0_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_1_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_2_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_3_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_4_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_4_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[4]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[4]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_5_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_5_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[5]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[5]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_6_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_6_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[6]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[6]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @bfmlalt_lane_7_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmlalt_lane_7_f32:
-; CHECK-NEXT:  bfmlalt z0.s, z1.h, z2.h[7]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[7]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
   ret <vscale x 4 x float> %out
 }
@@ -202,8 +226,9 @@ define <vscale x 4 x float> @bfmlalt_lane_7_f32(<vscale x 4 x float> %a, <vscale
 
 define <vscale x 4 x float> @bfmmla_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
 ; CHECK-LABEL: bfmmla_f32:
-; CHECK-NEXT:  bfmmla z0.s, z1.h, z2.h
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmmla z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
   ret <vscale x 4 x float> %out
 }
@@ -214,8 +239,9 @@ define <vscale x 4 x float> @bfmmla_f32(<vscale x 4 x float> %a, <vscale x 8 x b
 
 define <vscale x 8 x bfloat> @fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: fcvt_bf16_f32:
-; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfcvt z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
   ret <vscale x 8 x bfloat> %out
 }
@@ -226,8 +252,9 @@ define <vscale x 8 x bfloat> @fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x
 
 define <vscale x 8 x bfloat> @fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: fcvtnt_bf16_f32:
-; CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfcvtnt z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
   ret <vscale x 8 x bfloat> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
index 52ceec011420d..4bcdaded9c28b 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -5,88 +6,110 @@
 ; Testing prfop encodings
 ;
 define void @test_svprf_pldl1strm(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pldl1strm
-; CHECK: prfb pldl1strm, p0, [x0]
+; CHECK-LABEL: test_svprf_pldl1strm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 1)
   ret void
 }
 
 define void @test_svprf_pldl2keep(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pldl2keep
-; CHECK: prfb pldl2keep, p0, [x0]
+; CHECK-LABEL: test_svprf_pldl2keep:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pldl2keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 2)
   ret void
 }
 
 define void @test_svprf_pldl2strm(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pldl2strm
-; CHECK: prfb pldl2strm, p0, [x0]
+; CHECK-LABEL: test_svprf_pldl2strm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pldl2strm, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 3)
   ret void
 }
 
 define void @test_svprf_pldl3keep(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pldl3keep
-; CHECK: prfb pldl3keep, p0, [x0]
+; CHECK-LABEL: test_svprf_pldl3keep:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pldl3keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 4)
   ret void
 }
 
 define void @test_svprf_pldl3strm(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pldl3strm
-; CHECK: prfb pldl3strm, p0, [x0]
+; CHECK-LABEL: test_svprf_pldl3strm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pldl3strm, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 5)
   ret void
 }
 
 define void @test_svprf_pstl1keep(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pstl1keep
-; CHECK: prfb pstl1keep, p0, [x0]
+; CHECK-LABEL: test_svprf_pstl1keep:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl1keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 8)
   ret void
 }
 
 define void @test_svprf_pstl1strm(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pstl1strm
-; CHECK: prfb pstl1strm, p0, [x0]
+; CHECK-LABEL: test_svprf_pstl1strm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl1strm, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 9)
   ret void
 }
 
 define void @test_svprf_pstl2keep(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pstl2keep
-; CHECK: prfb pstl2keep, p0, [x0]
+; CHECK-LABEL: test_svprf_pstl2keep:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl2keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 10)
   ret void
 }
 
 define void @test_svprf_pstl2strm(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pstl2strm
-; CHECK: prfb pstl2strm, p0, [x0]
+; CHECK-LABEL: test_svprf_pstl2strm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl2strm, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 11)
   ret void
 }
 
 define void @test_svprf_pstl3keep(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pstl3keep
-; CHECK: prfb pstl3keep, p0, [x0]
+; CHECK-LABEL: test_svprf_pstl3keep:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl3keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 12)
   ret void
 }
 
 define void @test_svprf_pstl3strm(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprf_pstl3strm
-; CHECK: prfb pstl3strm, p0, [x0]
+; CHECK-LABEL: test_svprf_pstl3strm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl3strm, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 13)
   ret void
@@ -97,8 +120,14 @@ entry:
 ;
 
 define void @test_svprf_vnum_under(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
-; CHECK-LABEL: test_svprf_vnum_under
-; CHECK-NOT: prfb pstl3strm, p0, [x0, #-33, mul vl]
+; CHECK-LABEL: test_svprf_vnum_under:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x9, #-528
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    prfb pstl3strm, p0, [x0, x8]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 -33, i64 0
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
@@ -106,8 +135,10 @@ entry:
 }
 
 define void @test_svprf_vnum_min(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
-; CHECK-LABEL: test_svprf_vnum_min
-; CHECK: prfb pstl3strm, p0, [x0, #-32, mul vl]
+; CHECK-LABEL: test_svprf_vnum_min:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl3strm, p0, [x0, #-32, mul vl]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 -32, i64 0
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
@@ -115,8 +146,14 @@ entry:
 }
 
 define void @test_svprf_vnum_over(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
-; CHECK-LABEL: test_svprf_vnum_over
-; CHECK-NOT: prfb pstl3strm, p0, [x0, #32, mul vl]
+; CHECK-LABEL: test_svprf_vnum_over:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov w9, #512
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    prfb pstl3strm, p0, [x0, x8]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 32, i64 0
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
@@ -124,8 +161,10 @@ entry:
 }
 
 define void @test_svprf_vnum_max(<vscale x 16 x i1> %pg, <vscale x 16 x i8>* %base) {
-; CHECK-LABEL: test_svprf_vnum_max
-; CHECK: prfb pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-LABEL: test_svprf_vnum_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr inbounds <vscale x 16 x i8>, <vscale x 16 x i8>* %base, i64 31, i64 0
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %gep, i32 13)
@@ -137,32 +176,40 @@ entry:
 ;
 
 define void @test_svprfb(<vscale x 16 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprfb
-; CHECK: prfb pldl1keep, p0, [x0]
+; CHECK-LABEL: test_svprfb:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pldl1keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 0)
   ret void
 }
 
 define void @test_svprfh(<vscale x 8 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprfh
-; CHECK: prfh pldl1keep, p0, [x0]
+; CHECK-LABEL: test_svprfh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfh pldl1keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %pg, i8* %base, i32 0)
   ret void
 }
 
 define void @test_svprfw(<vscale x 4 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprfw
-; CHECK: prfw pldl1keep, p0, [x0]
+; CHECK-LABEL: test_svprfw:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfw pldl1keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %pg, i8* %base, i32 0)
   ret void
 }
 
 define void @test_svprfd(<vscale x 2 x i1> %pg, i8* %base) {
-; CHECK-LABEL: test_svprfd
-; CHECK: prfd pldl1keep, p0, [x0]
+; CHECK-LABEL: test_svprfd:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfd pldl1keep, p0, [x0]
+; CHECK-NEXT:    ret
 entry:
   tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %pg, i8* %base, i32 0)
   ret void
@@ -174,8 +221,10 @@ entry:
 ; imm form of prfb is tested above
 
 define void @test_svprfh_vnum(<vscale x 8 x i1> %pg, <vscale x 8 x i16>* %base) {
-; CHECK-LABEL: test_svprfh_vnum
-; CHECK: prfh pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-LABEL: test_svprfh_vnum:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfh pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %base, i64 31
   %addr = bitcast <vscale x 8 x i16>* %gep to i8*
@@ -184,8 +233,10 @@ entry:
 }
 
 define void @test_svprfw_vnum(<vscale x 4 x i1> %pg, <vscale x 4 x i32>* %base) {
-; CHECK-LABEL: test_svprfw_vnum
-; CHECK: prfw pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-LABEL: test_svprfw_vnum:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfw pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %base, i64 31
   %addr = bitcast <vscale x 4 x i32>* %gep to i8*
@@ -194,8 +245,10 @@ entry:
 }
 
 define void @test_svprfd_vnum(<vscale x 2 x i1> %pg, <vscale x 2 x i64>* %base) {
-; CHECK-LABEL: test_svprfd_vnum
-; CHECK: prfd pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-LABEL: test_svprfd_vnum:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfd pstl3strm, p0, [x0, #31, mul vl]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, i64 31
   %addr = bitcast <vscale x 2 x i64>* %gep to i8*
@@ -208,8 +261,10 @@ entry:
 ;
 
 define void @test_svprfb_ss(<vscale x 16 x i1> %pg, i8* %base, i64 %offset) {
-; CHECK-LABEL: test_svprfb_ss
-; CHECK: prfb pstl3strm, p0, [x0, x1]
+; CHECK-LABEL: test_svprfb_ss:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfb pstl3strm, p0, [x0, x1]
+; CHECK-NEXT:    ret
 entry:
   %addr = getelementptr i8, i8* %base, i64 %offset
   tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %addr, i32 13)
@@ -217,8 +272,10 @@ entry:
 }
 
 define void @test_svprfh_ss(<vscale x 8 x i1> %pg, i16* %base, i64 %offset) {
-; CHECK-LABEL: test_svprfh_ss
-; CHECK: prfh pstl3strm, p0, [x0, x1, lsl #1]
+; CHECK-LABEL: test_svprfh_ss:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfh pstl3strm, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr i16, i16* %base, i64 %offset
   %addr = bitcast i16* %gep to i8*
@@ -227,8 +284,10 @@ entry:
 }
 
 define void @test_svprfw_ss(<vscale x 4 x i1> %pg, i32* %base, i64 %offset) {
-; CHECK-LABEL: test_svprfw_ss
-; CHECK: prfw pstl3strm, p0, [x0, x1, lsl #2]
+; CHECK-LABEL: test_svprfw_ss:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfw pstl3strm, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr i32, i32* %base, i64 %offset
   %addr = bitcast i32* %gep to i8*
@@ -237,8 +296,10 @@ entry:
 }
 
 define void @test_svprfd_ss(<vscale x 2 x i1> %pg, i64* %base, i64 %offset) {
-; CHECK-LABEL: test_svprfd_ss
-; CHECK: prfd pstl3strm, p0, [x0, x1, lsl #3]
+; CHECK-LABEL: test_svprfd_ss:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    prfd pstl3strm, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 entry:
   %gep = getelementptr i64, i64* %base, i64 %offset
   %addr = bitcast i64* %gep to i8*

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
index ecb1c6bc00ef3..9e36403c73eb0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 8 x i16> @sxtb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sxtb_i16:
-; CHECK: sxtb z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
@@ -17,8 +19,9 @@ define <vscale x 8 x i16> @sxtb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x i32> @sxtb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sxtb_i32:
-; CHECK: sxtb z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -27,8 +30,9 @@ define <vscale x 4 x i32> @sxtb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @sxtb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sxtb_i64:
-; CHECK: sxtb z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -41,8 +45,9 @@ define <vscale x 2 x i64> @sxtb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 4 x i32> @sxth_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sxth_i32:
-; CHECK: sxth z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxth z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -51,8 +56,9 @@ define <vscale x 4 x i32> @sxth_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @sxth_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sxth_i64:
-; CHECK: sxth z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxth z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -65,8 +71,9 @@ define <vscale x 2 x i64> @sxth_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 2 x i64> @sxtw_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sxtw_i64:
-; CHECK: sxtw z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxtw z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -79,8 +86,9 @@ define <vscale x 2 x i64> @sxtw_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 8 x i16> @uxtb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uxtb_i16:
-; CHECK: uxtb z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uxtb z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
@@ -89,8 +97,9 @@ define <vscale x 8 x i16> @uxtb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x i32> @uxtb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uxtb_i32:
-; CHECK: uxtb z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uxtb z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -99,8 +108,9 @@ define <vscale x 4 x i32> @uxtb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @uxtb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uxtb_i64:
-; CHECK: uxtb z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uxtb z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -113,8 +123,9 @@ define <vscale x 2 x i64> @uxtb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 4 x i32> @uxth_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uxth_i32:
-; CHECK: uxth z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uxth z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -123,8 +134,9 @@ define <vscale x 4 x i32> @uxth_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @uxth_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uxth_i64:
-; CHECK: uxth z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uxth z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -137,8 +149,9 @@ define <vscale x 2 x i64> @uxth_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 2 x i64> @uxtw_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uxtw_i64:
-; CHECK: uxtw z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uxtw z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
index dbf3474ae07ae..2a2386eb0b4b3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @cls_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cls_i8:
-; CHECK: cls z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %b)
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @cls_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg,
 
 define <vscale x 8 x i16> @cls_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cls_i16:
-; CHECK: cls z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %b)
@@ -27,8 +30,9 @@ define <vscale x 8 x i16> @cls_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg,
 
 define <vscale x 4 x i32> @cls_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cls_i32:
-; CHECK: cls z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %b)
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @cls_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,
 
 define <vscale x 2 x i64> @cls_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cls_i64:
-; CHECK: cls z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cls z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %b)
@@ -51,8 +56,9 @@ define <vscale x 2 x i64> @cls_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,
 
 define <vscale x 16 x i8> @clz_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: clz_i8:
-; CHECK: clz z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %b)
@@ -61,8 +67,9 @@ define <vscale x 16 x i8> @clz_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg,
 
 define <vscale x 8 x i16> @clz_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: clz_i16:
-; CHECK: clz z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %b)
@@ -71,8 +78,9 @@ define <vscale x 8 x i16> @clz_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg,
 
 define <vscale x 4 x i32> @clz_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: clz_i32:
-; CHECK: clz z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %b)
@@ -81,8 +89,9 @@ define <vscale x 4 x i32> @clz_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,
 
 define <vscale x 2 x i64> @clz_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: clz_i64:
-; CHECK: clz z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %b)
@@ -95,8 +104,9 @@ define <vscale x 2 x i64> @clz_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,
 
 define <vscale x 16 x i8> @cnt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cnt_i8:
-; CHECK: cnt z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %b)
@@ -105,8 +115,9 @@ define <vscale x 16 x i8> @cnt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg,
 
 define <vscale x 8 x i16> @cnt_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cnt_i16:
-; CHECK: cnt z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %b)
@@ -115,8 +126,9 @@ define <vscale x 8 x i16> @cnt_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg,
 
 define <vscale x 4 x i32> @cnt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cnt_i32:
-; CHECK: cnt z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %b)
@@ -125,8 +137,9 @@ define <vscale x 4 x i32> @cnt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,
 
 define <vscale x 2 x i64> @cnt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cnt_i64:
-; CHECK: cnt z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %b)
@@ -135,8 +148,9 @@ define <vscale x 2 x i64> @cnt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,
 
 define <vscale x 8 x i16> @cnt_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: cnt_f16:
-; CHECK: cnt z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i1> %pg,
                                                                <vscale x 8 x half> %b)
@@ -145,8 +159,9 @@ define <vscale x 8 x i16> @cnt_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg,
 
 define <vscale x 8 x i16> @cnt_bf16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: cnt_bf16:
-; CHECK: cnt z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x bfloat> %b)
@@ -155,8 +170,9 @@ define <vscale x 8 x i16> @cnt_bf16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x i32> @cnt_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: cnt_f32:
-; CHECK: cnt z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i1> %pg,
                                                                <vscale x 4 x float> %b)
@@ -165,8 +181,9 @@ define <vscale x 4 x i32> @cnt_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,
 
 define <vscale x 2 x i64> @cnt_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: cnt_f64:
-; CHECK: cnt z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i1> %pg,
                                                                <vscale x 2 x double> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
index 61c31ea678656..549cbd49cd4c5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
-; RUN: llc -mtriple aarch64 -mattr=+sme -asm-verbose=1 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s
 
 ;
 ; SVCREATE2 (i8)
@@ -7,8 +8,12 @@
 
 define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s8_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB0_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -20,9 +25,13 @@ L2:
 
 define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s8_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB1_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -38,8 +47,12 @@ L2:
 
 define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB2_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB2_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -51,9 +64,13 @@ L2:
 
 define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s16_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB3_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB3_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -69,8 +86,12 @@ L2:
 
 define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB4_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB4_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -82,9 +103,13 @@ L2:
 
 define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f16_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB5_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB5_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -100,8 +125,12 @@ L2:
 
 define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
 ; CHECK-LABEL: test_svcreate2_bf16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB6_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB6_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -113,9 +142,13 @@ L2:
 
 define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
 ; CHECK-LABEL: test_svcreate2_bf16_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB7_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB7_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -131,8 +164,12 @@ L2:
 
 define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s32_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB8_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB8_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -144,9 +181,13 @@ L2:
 
 define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s32_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB9_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB9_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -162,8 +203,12 @@ L2:
 
 define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f32_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB10_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB10_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -175,9 +220,13 @@ L2:
 
 define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f32_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB11_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB11_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -193,8 +242,12 @@ L2:
 
 define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s64_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB12_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB12_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -206,9 +259,13 @@ L2:
 
 define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s64_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB13_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB13_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -224,8 +281,12 @@ L2:
 
 define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f64_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB14_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB14_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -237,9 +298,13 @@ L2:
 
 define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f64_vec1:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB15_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB15_2: // %L2
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
   br i1 %p, label %L1, label %L2
 L1:
@@ -255,8 +320,12 @@ L2:
 
 define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s8_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB16_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB16_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -268,9 +337,13 @@ L2:
 
 define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s8_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB17_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB17_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -286,8 +359,12 @@ L2:
 
 define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB18_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB18_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -299,9 +376,13 @@ L2:
 
 define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s16_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB19_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB19_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -317,8 +398,12 @@ L2:
 
 define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB20_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB20_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -330,9 +415,13 @@ L2:
 
 define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f16_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB21_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB21_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -348,8 +437,12 @@ L2:
 
 define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
 ; CHECK-LABEL: test_svcreate3_bf16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB22_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB22_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -361,9 +454,13 @@ L2:
 
 define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
 ; CHECK-LABEL: test_svcreate3_bf16_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB23_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB23_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -379,8 +476,12 @@ L2:
 
 define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s32_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB24_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB24_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -392,9 +493,13 @@ L2:
 
 define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s32_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB25_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB25_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -410,8 +515,12 @@ L2:
 
 define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f32_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB26_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB26_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -423,9 +532,13 @@ L2:
 
 define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f32_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB27_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB27_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -441,8 +554,12 @@ L2:
 
 define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s64_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB28_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB28_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -454,9 +571,13 @@ L2:
 
 define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s64_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB29_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB29_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -472,8 +593,12 @@ L2:
 
 define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f64_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB30_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB30_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -485,9 +610,13 @@ L2:
 
 define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f64_vec2:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB31_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB31_2: // %L2
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
   br i1 %p, label %L1, label %L2
 L1:
@@ -503,8 +632,12 @@ L2:
 
 define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s8_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB32_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB32_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -516,9 +649,13 @@ L2:
 
 define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s8_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB33_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB33_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -534,8 +671,12 @@ L2:
 
 define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB34_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB34_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -547,9 +688,13 @@ L2:
 
 define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s16_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB35_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB35_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -565,8 +710,12 @@ L2:
 
 define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB36_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB36_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -578,9 +727,13 @@ L2:
 
 define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f16_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB37_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB37_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -596,8 +749,12 @@ L2:
 
 define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
 ; CHECK-LABEL: test_svcreate4_bf16_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB38_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB38_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -609,9 +766,13 @@ L2:
 
 define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
 ; CHECK-LABEL: test_svcreate4_bf16_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB39_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB39_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -627,8 +788,12 @@ L2:
 
 define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s32_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB40_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB40_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -640,9 +805,13 @@ L2:
 
 define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s32_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB41_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB41_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -658,8 +827,12 @@ L2:
 
 define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f32_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB42_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB42_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -671,9 +844,13 @@ L2:
 
 define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f32_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB43_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB43_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -689,8 +866,12 @@ L2:
 
 define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s64_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB44_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB44_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -702,9 +883,13 @@ L2:
 
 define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s64_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB45_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB45_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -720,8 +905,12 @@ L2:
 
 define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f64_vec0:
-; CHECK: // %L2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB46_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    // implicit-def: $z0
+; CHECK-NEXT:  .LBB46_2: // %L2
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
   br i1 %p, label %L1, label %L2
 L1:
@@ -733,9 +922,13 @@ L2:
 
 define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f64_vec3:
-; CHECK: // %L2
-; CHECK-NEXT: mov z0.d, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB47_2
+; CHECK-NEXT:  // %bb.1: // %common.ret
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB47_2: // %L2
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
   br i1 %p, label %L1, label %L2
 L1:

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
index a4e24dcb6ab40..bf113a7cabc15 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
 ; Unpredicated dup instruction (which is an alias for mov):
@@ -8,144 +9,169 @@
 
 define <vscale x 16 x i8> @dup_i8(i8 %b) {
 ; CHECK-LABEL: dup_i8:
-; CHECK: mov z0.b, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 16 x i8> @dup_imm_i8() {
 ; CHECK-LABEL: dup_imm_i8:
-; CHECK: mov z0.b, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, #16 // =0x10
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 16)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @dup_i16(i16 %b) {
 ; CHECK-LABEL: dup_i16:
-; CHECK: mov z0.h, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 8 x i16> @dup_imm_i16(i16 %b) {
 ; CHECK-LABEL: dup_imm_i16:
-; CHECK: mov z0.h, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #16 // =0x10
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @dup_i32(i32 %b) {
 ; CHECK-LABEL: dup_i32:
-; CHECK: mov z0.s, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 4 x i32> @dup_imm_i32(i32 %b) {
 ; CHECK-LABEL: dup_imm_i32:
-; CHECK: mov z0.s, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #16 // =0x10
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 16)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @dup_i64(i64 %b) {
 ; CHECK-LABEL: dup_i64:
-; CHECK: mov z0.d, x0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b)
   ret <vscale x 2 x i64> %out
 }
 
 define <vscale x 2 x i64> @dup_imm_i64(i64 %b) {
 ; CHECK-LABEL: dup_imm_i64:
-; CHECK: mov z0.d, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #16 // =0x10
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 16)
   ret <vscale x 2 x i64> %out
 }
 
 define <vscale x 8 x half> @dup_f16(half %b) {
 ; CHECK-LABEL: dup_f16:
-; CHECK: mov z0.h, h0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
 ; CHECK-LABEL: dup_bf16:
-; CHECK: mov z0.h, h0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
   ret <vscale x 8 x bfloat> %out
 }
 
 define <vscale x 8 x half> @dup_imm_f16(half %b) {
 ; CHECK-LABEL: dup_imm_f16:
-; CHECK: mov z0.h, #16.00000000
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov z0.h, #16.00000000
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 4 x float> @dup_f32(float %b) {
 ; CHECK-LABEL: dup_f32:
-; CHECK: mov z0.s, s0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 4 x float> @dup_imm_f32(float %b) {
 ; CHECK-LABEL: dup_imm_f32:
-; CHECK: mov z0.s, #16.00000000
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov z0.s, #16.00000000
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 16.)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @dup_f64(double %b) {
 ; CHECK-LABEL: dup_f64:
-; CHECK: mov z0.d, d0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b)
   ret <vscale x 2 x double> %out
 }
 
 define <vscale x 2 x double> @dup_imm_f64(double %b) {
 ; CHECK-LABEL: dup_imm_f64:
-; CHECK: mov z0.d, #16.00000000
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov z0.d, #16.00000000
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 16.)
   ret <vscale x 2 x double> %out
 }
 
 define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
 ; CHECK-LABEL: dup_fmov_imm_f32_2:
-; CHECK: mov w8, #1109917696
-; CHECK-NEXT: mov z0.s, w8
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1109917696
+; CHECK-NEXT:    mov z0.s, w8
+; CHECK-NEXT:    ret
   %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01)
   ret <vscale x 2 x float> %out
 }
 
 define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
 ; CHECK-LABEL: dup_fmov_imm_f32_4:
-; CHECK: mov w8, #1109917696
-; CHECK-NEXT: mov z0.s, w8
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1109917696
+; CHECK-NEXT:    mov z0.s, w8
+; CHECK-NEXT:    ret
   %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @dup_fmov_imm_f64_2() {
 ; CHECK-LABEL: dup_fmov_imm_f64_2:
-; CHECK: mov x8, #4631107791820423168
-; CHECK-NEXT: mov z0.d, x8
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4631107791820423168
+; CHECK-NEXT:    mov z0.d, x8
+; CHECK-NEXT:    ret
   %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01)
   ret <vscale x 2 x double> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
index a86da9594a212..aa49708730df8 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; LDFF1H
 define <vscale x 4 x i32> @gldff1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1h_s_uxtw_index:
-; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -20,8 +22,9 @@ define <vscale x 4 x i32> @gldff1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %bas
 
 define <vscale x 4 x i32> @gldff1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1h_s_sxtw_index:
-; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -31,8 +34,9 @@ define <vscale x 4 x i32> @gldff1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %bas
 
 define <vscale x 2 x i64> @gldff1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1h_d_uxtw_index:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -42,8 +46,9 @@ define <vscale x 2 x i64> @gldff1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %bas
 
 define <vscale x 2 x i64> @gldff1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1h_d_sxtw_index:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gldff1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %bas
 ; LDFF1W
 define <vscale x 4 x i32> @gldff1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_uxtw_index:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                     i32* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -64,8 +70,9 @@ define <vscale x 4 x i32> @gldff1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %bas
 
 define <vscale x 4 x i32> @gldff1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_sxtw_index:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                     i32* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -74,8 +81,9 @@ define <vscale x 4 x i32> @gldff1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %bas
 
 define <vscale x 2 x i64> @gldff1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1w_d_uxtw_index:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                     i32* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -85,8 +93,9 @@ define <vscale x 2 x i64> @gldff1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %bas
 
 define <vscale x 2 x i64> @gldff1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1w_d_sxtw_index:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                     i32* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -96,8 +105,9 @@ define <vscale x 2 x i64> @gldff1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %bas
 
 define <vscale x 4 x float> @gldff1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_uxtw_index_float:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                       float* %base,
                                                                                       <vscale x 4 x i32> %b)
@@ -106,8 +116,9 @@ define <vscale x 4 x float> @gldff1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, f
 
 define <vscale x 4 x float> @gldff1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_sxtw_index_float:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                       float* %base,
                                                                                       <vscale x 4 x i32> %b)
@@ -117,8 +128,9 @@ define <vscale x 4 x float> @gldff1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, f
 ; LDFF1D
 define <vscale x 2 x i64> @gldff1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_s_uxtw_index:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                     i64* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -127,8 +139,9 @@ define <vscale x 2 x i64> @gldff1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %bas
 
 define <vscale x 2 x i64> @gldff1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_sxtw_index:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                     i64* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -137,8 +150,9 @@ define <vscale x 2 x i64> @gldff1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base,
 
 define <vscale x 2 x double> @gldff1d_uxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_uxtw_index_double:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                        double* %base,
                                                                                        <vscale x 2 x i32> %b)
@@ -147,8 +161,9 @@ define <vscale x 2 x double> @gldff1d_uxtw_index_double(<vscale x 2 x i1> %pg, d
 
 define <vscale x 2 x double> @gldff1d_sxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_sxtw_index_double:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                        double* %base,
                                                                                        <vscale x 2 x i32> %b)
@@ -164,8 +179,9 @@ define <vscale x 2 x double> @gldff1d_sxtw_index_double(<vscale x 2 x i1> %pg, d
 ; LDFF1SH
 define <vscale x 4 x i32> @gldff1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_s_uxtw_index:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -175,8 +191,9 @@ define <vscale x 4 x i32> @gldff1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %ba
 
 define <vscale x 4 x i32> @gldff1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_s_sxtw_index:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -186,8 +203,9 @@ define <vscale x 4 x i32> @gldff1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %ba
 
 define <vscale x 2 x i64> @gldff1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_d_uxtw_index:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -197,8 +215,9 @@ define <vscale x 2 x i64> @gldff1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %ba
 
 define <vscale x 2 x i64> @gldff1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_d_sxtw_index:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                     i16* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -209,8 +228,9 @@ define <vscale x 2 x i64> @gldff1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %ba
 ; LDFF1SW
 define <vscale x 2 x i64> @gldff1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sw_d_uxtw_index:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                     i32* %base,
                                                                                     <vscale x 2 x i32> %b)
@@ -220,8 +240,9 @@ define <vscale x 2 x i64> @gldff1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %ba
 
 define <vscale x 2 x i64> @gldff1sw_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sw_d_sxtw_index:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                     i32* %base,
                                                                                     <vscale x 2 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
index 012812fb22b06..b076f2e007baa 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; LDFF1B
 define <vscale x 4 x i32> @gldff1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1b_s_uxtw:
-; CHECK: ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -20,8 +22,9 @@ define <vscale x 4 x i32> @gldff1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vsc
 
 define <vscale x 4 x i32> @gldff1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1b_s_sxtw:
-; CHECK: ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -31,8 +34,9 @@ define <vscale x 4 x i32> @gldff1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vsc
 
 define <vscale x 2 x i64> @gldff1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1b_d_uxtw:
-; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -42,8 +46,9 @@ define <vscale x 2 x i64> @gldff1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vsc
 
 define <vscale x 2 x i64> @gldff1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1b_d_sxtw:
-; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gldff1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vsc
 ; LDFF1H
 define <vscale x 4 x i32> @gldff1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1h_s_uxtw:
-; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -65,8 +71,9 @@ define <vscale x 4 x i32> @gldff1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vs
 
 define <vscale x 4 x i32> @gldff1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1h_s_sxtw:
-; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -76,8 +83,9 @@ define <vscale x 4 x i32> @gldff1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vs
 
 define <vscale x 2 x i64> @gldff1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1h_d_uxtw:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -87,8 +95,9 @@ define <vscale x 2 x i64> @gldff1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vs
 
 define <vscale x 2 x i64> @gldff1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1h_d_sxtw:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -99,8 +108,9 @@ define <vscale x 2 x i64> @gldff1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vs
 ; LDFF1W
 define <vscale x 4 x i32> @gldff1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_uxtw:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -109,8 +119,9 @@ define <vscale x 4 x i32> @gldff1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vs
 
 define <vscale x 4 x i32> @gldff1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_sxtw:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -119,8 +130,9 @@ define <vscale x 4 x i32> @gldff1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vs
 
 define <vscale x 2 x i64> @gldff1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1w_d_uxtw:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -130,8 +142,9 @@ define <vscale x 2 x i64> @gldff1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vs
 
 define <vscale x 2 x i64> @gldff1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1w_d_sxtw:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -141,8 +154,9 @@ define <vscale x 2 x i64> @gldff1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vs
 
 define <vscale x 4 x float> @gldff1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_uxtw_float:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                 float* %base,
                                                                                 <vscale x 4 x i32> %b)
@@ -151,8 +165,9 @@ define <vscale x 4 x float> @gldff1w_s_uxtw_float(<vscale x 4 x i1> %pg, float*
 
 define <vscale x 4 x float> @gldff1w_s_sxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1w_s_sxtw_float:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                 float* %base,
                                                                                 <vscale x 4 x i32> %b)
@@ -162,8 +177,9 @@ define <vscale x 4 x float> @gldff1w_s_sxtw_float(<vscale x 4 x i1> %pg, float*
 ; LDFF1D
 define <vscale x 2 x i64> @gldff1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_d_uxtw:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i64(<vscale x 2 x i1> %pg,
                                                                               i64* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -172,8 +188,9 @@ define <vscale x 2 x i64> @gldff1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vs
 
 define <vscale x 2 x i64> @gldff1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_d_sxtw:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i64(<vscale x 2 x i1> %pg,
                                                                               i64* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -182,8 +199,9 @@ define <vscale x 2 x i64> @gldff1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vs
 
 define <vscale x 2 x double> @gldff1d_d_uxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_d_uxtw_double:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                  double* %base,
                                                                                  <vscale x 2 x i32> %b)
@@ -192,8 +210,9 @@ define <vscale x 2 x double> @gldff1d_d_uxtw_double(<vscale x 2 x i1> %pg, doubl
 
 define <vscale x 2 x double> @gldff1d_d_sxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1d_d_sxtw_double:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                  double* %base,
                                                                                  <vscale x 2 x i32> %b)
@@ -209,8 +228,9 @@ define <vscale x 2 x double> @gldff1d_d_sxtw_double(<vscale x 2 x i1> %pg, doubl
 ; LDFF1SB
 define <vscale x 4 x i32> @gldff1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1sb_s_uxtw:
-; CHECK: ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -220,8 +240,9 @@ define <vscale x 4 x i32> @gldff1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vs
 
 define <vscale x 4 x i32> @gldff1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1sb_s_sxtw:
-; CHECK: ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -231,8 +252,9 @@ define <vscale x 4 x i32> @gldff1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vs
 
 define <vscale x 2 x i64> @gldff1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sb_d_uxtw:
-; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -242,8 +264,9 @@ define <vscale x 2 x i64> @gldff1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vs
 
 define <vscale x 2 x i64> @gldff1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sb_d_sxtw:
-; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -254,8 +277,9 @@ define <vscale x 2 x i64> @gldff1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vs
 ; LDFF1SH
 define <vscale x 4 x i32> @gldff1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_s_uxtw:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -265,8 +289,9 @@ define <vscale x 4 x i32> @gldff1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <v
 
 define <vscale x 4 x i32> @gldff1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_s_sxtw:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -276,8 +301,9 @@ define <vscale x 4 x i32> @gldff1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <v
 
 define <vscale x 2 x i64> @gldff1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_d_uxtw:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -287,8 +313,9 @@ define <vscale x 2 x i64> @gldff1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <v
 
 define <vscale x 2 x i64> @gldff1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sh_d_sxtw:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -299,8 +326,9 @@ define <vscale x 2 x i64> @gldff1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <v
 ; LDFF1SW
 define <vscale x 2 x i64> @gldff1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sw_d_uxtw:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 2 x i32> %b)
@@ -310,8 +338,9 @@ define <vscale x 2 x i64> @gldff1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <v
 
 define <vscale x 2 x i64> @gldff1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gldff1sw_d_sxtw:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 2 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
index 4d52673560817..a1f842769dae5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,9 +7,10 @@
 ;
 
 define <vscale x 2 x i64> @gldff1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldff1h_index
-; CHECK:	    ldff1h	{ z0.d }, p0/z, [x0, z0.d, lsl #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gldff1h_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                i16* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -17,9 +19,10 @@ define <vscale x 2 x i64> @gldff1h_index(<vscale x 2 x i1> %pg, i16* %base, <vsc
 }
 
 define <vscale x 2 x i64> @gldff1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldff1w_index
-; CHECK:	    ldff1w	{ z0.d }, p0/z, [x0, z0.d, lsl #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gldff1w_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                i32* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -28,9 +31,10 @@ define <vscale x 2 x i64> @gldff1w_index(<vscale x 2 x i1> %pg, i32* %base, <vsc
 }
 
 define <vscale x 2 x i64> @gldff1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldff1d_index
-; CHECK:	    ldff1d	{ z0.d }, p0/z, [x0, z0.d, lsl #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gldff1d_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                i64* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -38,9 +42,10 @@ define <vscale x 2 x i64> @gldff1d_index(<vscale x 2 x i1> %pg, i64* %base, <vsc
 }
 
 define <vscale x 2 x double> @gldff1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldff1d_index_double
-; CHECK:	    ldff1d	{ z0.d }, p0/z, [x0, z0.d, lsl #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gldff1d_index_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                   double* %base,
                                                                                   <vscale x 2 x i64> %b)
@@ -53,9 +58,10 @@ define <vscale x 2 x double> @gldff1d_index_double(<vscale x 2 x i1> %pg, double
 ;
 
 define <vscale x 2 x i64> @gldff1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldff1sh_index
-; CHECK:	    ldff1sh	{ z0.d }, p0/z, [x0, z0.d, lsl #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gldff1sh_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                i16* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -64,9 +70,10 @@ define <vscale x 2 x i64> @gldff1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vs
 }
 
 define <vscale x 2 x i64> @gldff1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldff1sw_index
-; CHECK:	    ldff1sw	{ z0.d }, p0/z, [x0, z0.d, lsl #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gldff1sw_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                i32* %base,
                                                                                <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
index 570bac58cc9ac..140747340ab17 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -7,8 +8,9 @@
 
 define <vscale x 2 x i64> @gldff1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldff1b_d:
-; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> %pg,
                                                                        i8* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -18,8 +20,9 @@ define <vscale x 2 x i64> @gldff1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x
 
 define <vscale x 2 x i64> @gldff1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldff1h_d:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> %pg,
                                                                          i16* %base,
                                                                          <vscale x 2 x i64> %b)
@@ -29,8 +32,9 @@ define <vscale x 2 x i64> @gldff1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale
 
 define <vscale x 2 x i64> @gldff1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gldff1w_d:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> %pg,
                                                                          i32* %base,
                                                                          <vscale x 2 x i64> %offsets)
@@ -40,8 +44,9 @@ define <vscale x 2 x i64> @gldff1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale
 
 define <vscale x 2 x i64> @gldff1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldff1d_d:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> %pg,
                                                                          i64* %base,
                                                                          <vscale x 2 x i64> %b)
@@ -50,8 +55,9 @@ define <vscale x 2 x i64> @gldff1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale
 
 define <vscale x 2 x double> @gldff1d_d_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldff1d_d_double:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> %pg,
                                                                             double* %base,
                                                                             <vscale x 2 x i64> %b)
@@ -65,8 +71,9 @@ define <vscale x 2 x double> @gldff1d_d_double(<vscale x 2 x i1> %pg, double* %b
 
 define <vscale x 2 x i64> @gldff1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldff1sb_d:
-; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> %pg,
                                                                        i8* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -76,8 +83,9 @@ define <vscale x 2 x i64> @gldff1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale
 
 define <vscale x 2 x i64> @gldff1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldff1sh_d:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> %pg,
                                                                          i16* %base,
                                                                          <vscale x 2 x i64> %b)
@@ -87,8 +95,9 @@ define <vscale x 2 x i64> @gldff1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale
 
 define <vscale x 2 x i64> @gldff1sw_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gldff1sw_d:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> %pg,
                                                                          i32* %base,
                                                                          <vscale x 2 x i64> %offsets)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
index 5cb887932eff6..c7f0cd2b57481 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; LDFF1B
 define <vscale x 4 x i32> @gldff1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1b_s_imm_offset:
-; CHECK: ldff1b { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 16)
@@ -19,8 +21,9 @@ define <vscale x 4 x i32> @gldff1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x
 
 define <vscale x 2 x i64> @gldff1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1b_d_imm_offset:
-; CHECK: ldff1b { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -31,8 +34,9 @@ define <vscale x 2 x i64> @gldff1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 ; LDFF1H
 define <vscale x 4 x i32> @gldff1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1h_s_imm_offset:
-; CHECK: ldff1h { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 16)
@@ -42,8 +46,9 @@ define <vscale x 4 x i32> @gldff1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x
 
 define <vscale x 2 x i64> @gldff1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1h_d_imm_offset:
-; CHECK: ldff1h { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 16)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gldff1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 ; LDFF1W
 define <vscale x 4 x i32> @gldff1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1w_s_imm_offset:
-; CHECK: ldff1w { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 16)
@@ -64,8 +70,9 @@ define <vscale x 4 x i32> @gldff1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x
 
 define <vscale x 2 x i64> @gldff1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1w_d_imm_offset:
-; CHECK: ldff1w { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 16)
@@ -75,8 +82,9 @@ define <vscale x 2 x i64> @gldff1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 
 define <vscale x 4 x float> @gldff1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1w_s_imm_offset_float:
-; CHECK: ldff1w { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                  <vscale x 4 x i32> %base,
                                                                                                  i64 16)
@@ -86,8 +94,9 @@ define <vscale x 4 x float> @gldff1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <
 ; LDFF1D
 define <vscale x 2 x i64> @gldff1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1d_d_imm_offset:
-; CHECK: ldff1d { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 16)
@@ -96,8 +105,9 @@ define <vscale x 2 x i64> @gldff1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 
 define <vscale x 2 x double> @gldff1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1d_d_imm_offset_double:
-; CHECK: ldff1d { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                   <vscale x 2 x i64> %base,
                                                                                                   i64 16)
@@ -112,8 +122,9 @@ define <vscale x 2 x double> @gldff1d_d_imm_offset_double(<vscale x 2 x i1> %pg,
 ; LDFF1SB
 define <vscale x 4 x i32> @gldff1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1sb_s_imm_offset:
-; CHECK: ldff1sb { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 16)
@@ -123,8 +134,9 @@ define <vscale x 4 x i32> @gldff1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gldff1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1sb_d_imm_offset:
-; CHECK: ldff1sb { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -135,8 +147,9 @@ define <vscale x 2 x i64> @gldff1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale
 ; LDFF1SH
 define <vscale x 4 x i32> @gldff1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1sh_s_imm_offset:
-; CHECK: ldff1sh { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 16)
@@ -146,8 +159,9 @@ define <vscale x 4 x i32> @gldff1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gldff1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1sh_d_imm_offset:
-; CHECK: ldff1sh { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 16)
@@ -158,8 +172,9 @@ define <vscale x 2 x i64> @gldff1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale
 ; LDFF1SW
 define <vscale x 2 x i64> @gldff1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1sw_d_imm_offset:
-; CHECK: ldff1sw { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 16)
@@ -175,9 +190,10 @@ define <vscale x 2 x i64> @gldff1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale
 ; LDFF1B
 define <vscale x 4 x i32> @gldff1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1b_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ldff1b { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 32)
@@ -187,9 +203,10 @@ define <vscale x 4 x i32> @gldff1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @gldff1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1b_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ldff1b { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 32)
@@ -200,9 +217,10 @@ define <vscale x 2 x i64> @gldff1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %
 ; LDFF1H
 define <vscale x 4 x i32> @gldff1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1h_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ldff1h { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 63)
@@ -212,9 +230,10 @@ define <vscale x 4 x i32> @gldff1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @gldff1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1h_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ldff1h { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 63)
@@ -225,9 +244,10 @@ define <vscale x 2 x i64> @gldff1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %
 ; LDFF1W
 define <vscale x 4 x i32> @gldff1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1w_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 125)
@@ -236,9 +256,10 @@ define <vscale x 4 x i32> @gldff1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @gldff1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1w_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 125)
@@ -248,9 +269,10 @@ define <vscale x 2 x i64> @gldff1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %
 
 define <vscale x 4 x float> @gldff1w_s_imm_offset_out_of_range_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1w_s_imm_offset_out_of_range_float:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                  <vscale x 4 x i32> %base,
                                                                                                  i64 125)
@@ -260,9 +282,10 @@ define <vscale x 4 x float> @gldff1w_s_imm_offset_out_of_range_float(<vscale x 4
 ; LDFF1D
 define <vscale x 2 x i64> @gldff1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1d_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #249
-; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #249
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 249)
@@ -271,9 +294,10 @@ define <vscale x 2 x i64> @gldff1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %
 
 define <vscale x 2 x double> @gldff1d_d_imm_offset_out_of_range_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1d_d_imm_offset_out_of_range_double:
-; CHECK: mov	w8, #249
-; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #249
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                   <vscale x 2 x i64> %base,
                                                                                                   i64 249)
@@ -288,9 +312,10 @@ define <vscale x 2 x double> @gldff1d_d_imm_offset_out_of_range_double(<vscale x
 ; LDFF1SB
 define <vscale x 4 x i32> @gldff1sb_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1sb_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 32)
@@ -300,9 +325,10 @@ define <vscale x 4 x i32> @gldff1sb_s_imm_offset_out_of_range(<vscale x 4 x i1>
 
 define <vscale x 2 x i64> @gldff1sb_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1sb_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 32)
@@ -313,9 +339,10 @@ define <vscale x 2 x i64> @gldff1sb_d_imm_offset_out_of_range(<vscale x 2 x i1>
 ; LDFF1SH
 define <vscale x 4 x i32> @gldff1sh_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gldff1sh_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 63)
@@ -325,9 +352,10 @@ define <vscale x 4 x i32> @gldff1sh_s_imm_offset_out_of_range(<vscale x 4 x i1>
 
 define <vscale x 2 x i64> @gldff1sh_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1sh_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 63)
@@ -338,9 +366,10 @@ define <vscale x 2 x i64> @gldff1sh_d_imm_offset_out_of_range(<vscale x 2 x i1>
 ; LDFF1SW
 define <vscale x 2 x i64> @gldff1sw_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gldff1sw_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 125)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
index 4b9fba9dc2756..523249805e140 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; LDFF1B
 define <vscale x 4 x i32> @gldff1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1b_s_scalar_offset:
-; CHECK: ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 %offset)
@@ -19,8 +21,9 @@ define <vscale x 4 x i32> @gldff1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscal
 
 define <vscale x 2 x i64> @gldff1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1b_d_scalar_offset:
-; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)
@@ -31,8 +34,9 @@ define <vscale x 2 x i64> @gldff1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscal
 ; LDFF1H
 define <vscale x 4 x i32> @gldff1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1h_s_scalar_offset:
-; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 %offset)
@@ -42,8 +46,9 @@ define <vscale x 4 x i32> @gldff1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscal
 
 define <vscale x 2 x i64> @gldff1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1h_d_scalar_offset:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 %offset)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gldff1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscal
 ; LDFF1W
 define <vscale x 4 x i32> @gldff1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1w_s_scalar_offset:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 %offset)
@@ -64,8 +70,9 @@ define <vscale x 4 x i32> @gldff1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscal
 
 define <vscale x 2 x i64> @gldff1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1w_d_scalar_offset:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 %offset)
@@ -75,8 +82,9 @@ define <vscale x 2 x i64> @gldff1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscal
 
 define <vscale x 4 x float> @gldff1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1w_s_scalar_offset_float:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                  <vscale x 4 x i32> %base,
                                                                                                  i64 %offset)
@@ -86,8 +94,9 @@ define <vscale x 4 x float> @gldff1w_s_scalar_offset_float(<vscale x 4 x i1> %pg
 ; LDFF1D
 define <vscale x 2 x i64> @gldff1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1d_d_scalar_offset:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 %offset)
@@ -96,8 +105,9 @@ define <vscale x 2 x i64> @gldff1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscal
 
 define <vscale x 2 x double> @gldff1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1d_d_scalar_offset_double:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                   <vscale x 2 x i64> %base,
                                                                                                   i64 %offset)
@@ -111,8 +121,9 @@ define <vscale x 2 x double> @gldff1d_d_scalar_offset_double(<vscale x 2 x i1> %
 ; LDFF1SB
 define <vscale x 4 x i32> @gldff1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1sb_s_scalar_offset:
-; CHECK: ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 %offset)
@@ -122,8 +133,9 @@ define <vscale x 4 x i32> @gldff1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vsca
 
 define <vscale x 2 x i64> @gldff1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1sb_d_scalar_offset:
-; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)
@@ -134,8 +146,9 @@ define <vscale x 2 x i64> @gldff1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vsca
 ; LDFF1SH
 define <vscale x 4 x i32> @gldff1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1sh_s_scalar_offset:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 %offset)
@@ -145,8 +158,9 @@ define <vscale x 4 x i32> @gldff1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vsca
 
 define <vscale x 2 x i64> @gldff1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1sh_d_scalar_offset:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 %offset)
@@ -157,8 +171,9 @@ define <vscale x 2 x i64> @gldff1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vsca
 ; LDFF1SW
 define <vscale x 2 x i64> @gldff1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldff1sw_d_scalar_offset:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
                                                                                                i64 %offset)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
index 42eec59497fe8..160e4914a3e45 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
 
 ;
@@ -6,9 +7,10 @@
 
 define <vscale x 8 x half> @fadd_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fadd_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
                                                             <vscale x 8 x half> %a_z,
@@ -18,9 +20,10 @@ define <vscale x 8 x half> @fadd_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fadd_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fadd_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
                                                              <vscale x 4 x float> %a_z,
@@ -30,9 +33,10 @@ define <vscale x 4 x float> @fadd_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fadd_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fadd_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
                                                               <vscale x 2 x double> %a_z,
@@ -46,9 +50,10 @@ define <vscale x 2 x double> @fadd_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 8 x half> @fmax_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmax_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
                                                             <vscale x 8 x half> %a_z,
@@ -58,9 +63,10 @@ define <vscale x 8 x half> @fmax_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmax_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmax_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
                                                              <vscale x 4 x float> %a_z,
@@ -70,9 +76,10 @@ define <vscale x 4 x float> @fmax_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fmax_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmax_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
                                                               <vscale x 2 x double> %a_z,
@@ -86,9 +93,10 @@ define <vscale x 2 x double> @fmax_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 8 x half> @fmaxnm_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmaxnm_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
                                                               <vscale x 8 x half> %a_z,
@@ -98,9 +106,10 @@ define <vscale x 8 x half> @fmaxnm_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x h
 
 define <vscale x 4 x float> @fmaxnm_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmaxnm_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x float> %a_z,
@@ -110,9 +119,10 @@ define <vscale x 4 x float> @fmaxnm_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x
 
 define <vscale x 2 x double> @fmaxnm_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmaxnm_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x double> %a_z,
@@ -126,9 +136,10 @@ define <vscale x 2 x double> @fmaxnm_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x
 
 define <vscale x 8 x half> @fmin_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmin_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
                                                             <vscale x 8 x half> %a_z,
@@ -138,9 +149,10 @@ define <vscale x 8 x half> @fmin_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmin_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmin_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
                                                              <vscale x 4 x float> %a_z,
@@ -150,9 +162,10 @@ define <vscale x 4 x float> @fmin_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fmin_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmin_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
                                                               <vscale x 2 x double> %a_z,
@@ -166,9 +179,10 @@ define <vscale x 2 x double> @fmin_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 8 x half> @fminnm_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fminnm_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
                                                               <vscale x 8 x half> %a_z,
@@ -178,9 +192,10 @@ define <vscale x 8 x half> @fminnm_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x h
 
 define <vscale x 4 x float> @fminnm_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fminnm_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x float> %a_z,
@@ -190,9 +205,10 @@ define <vscale x 4 x float> @fminnm_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x
 
 define <vscale x 2 x double> @fminnm_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fminnm_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x double> %a_z,
@@ -206,9 +222,10 @@ define <vscale x 2 x double> @fminnm_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x
 
 define <vscale x 8 x half> @fmul_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmul_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
                                                             <vscale x 8 x half> %a_z,
@@ -218,9 +235,10 @@ define <vscale x 8 x half> @fmul_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmul_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmul_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
                                                              <vscale x 4 x float> %a_z,
@@ -230,9 +248,10 @@ define <vscale x 4 x float> @fmul_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fmul_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmul_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
                                                               <vscale x 2 x double> %a_z,
@@ -246,9 +265,10 @@ define <vscale x 2 x double> @fmul_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 8 x half> @fsub_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fsub_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
                                                             <vscale x 8 x half> %a_z,
@@ -258,9 +278,10 @@ define <vscale x 8 x half> @fsub_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fsub_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fsub_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
                                                              <vscale x 4 x float> %a_z,
@@ -270,9 +291,10 @@ define <vscale x 4 x float> @fsub_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fsub_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fsub_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
                                                               <vscale x 2 x double> %a_z,
@@ -286,9 +308,10 @@ define <vscale x 2 x double> @fsub_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 8 x half> @fsubr_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fsubr_h_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> %pg,
                                                              <vscale x 8 x half> %a_z,
@@ -298,9 +321,10 @@ define <vscale x 8 x half> @fsubr_h_zero(<vscale x 8 x i1> %pg, <vscale x 8 x ha
 
 define <vscale x 4 x float> @fsubr_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fsubr_s_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
                                                               <vscale x 4 x float> %a_z,
@@ -310,9 +334,10 @@ define <vscale x 4 x float> @fsubr_s_zero(<vscale x 4 x i1> %pg, <vscale x 4 x f
 
 define <vscale x 2 x double> @fsubr_d_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fsubr_d_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x double> %a_z,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
index 16c8529a87be7..a16f230cf8bbb 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,8 +7,9 @@
 
 define <vscale x 8 x half> @fabd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fabd_h:
-; CHECK: fabd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -16,8 +18,9 @@ define <vscale x 8 x half> @fabd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fabd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fabd_s:
-; CHECK: fabd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -26,8 +29,9 @@ define <vscale x 4 x float> @fabd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fabd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fabd_d:
-; CHECK: fabd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -40,8 +44,9 @@ define <vscale x 2 x double> @fabd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fabs_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fabs_h:
-; CHECK: fabs z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -50,8 +55,9 @@ define <vscale x 8 x half> @fabs_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x float> @fabs_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fabs_s:
-; CHECK: fabs z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %b)
@@ -60,8 +66,9 @@ define <vscale x 4 x float> @fabs_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %
 
 define <vscale x 2 x double> @fabs_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fabs_d:
-; CHECK: fabs z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %b)
@@ -74,8 +81,9 @@ define <vscale x 2 x double> @fabs_d(<vscale x 2 x double> %a, <vscale x 2 x i1>
 
 define <vscale x 8 x half> @fadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fadd_h:
-; CHECK: fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -84,8 +92,9 @@ define <vscale x 8 x half> @fadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fadd_s:
-; CHECK: fadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -94,8 +103,9 @@ define <vscale x 4 x float> @fadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fadd_d:
-; CHECK: fadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -108,8 +118,9 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fcadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcadd_h:
-; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcadd z0.h, p0/m, z0.h, z1.h, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b,
@@ -119,8 +130,9 @@ define <vscale x 8 x half> @fcadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fcadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcadd_s:
-; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcadd z0.s, p0/m, z0.s, z1.s, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b,
@@ -130,8 +142,9 @@ define <vscale x 4 x float> @fcadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fcadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcadd_d:
-; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcadd z0.d, p0/m, z0.d, z1.d, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b,
@@ -145,8 +158,9 @@ define <vscale x 2 x double> @fcadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fcmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fcmla_h:
-; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmla z0.h, p0/m, z1.h, z2.h, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b,
@@ -157,8 +171,9 @@ define <vscale x 8 x half> @fcmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fcmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fcmla_s:
-; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmla z0.s, p0/m, z1.s, z2.s, #180
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b,
@@ -169,8 +184,9 @@ define <vscale x 4 x float> @fcmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fcmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fcmla_d:
-; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmla z0.d, p0/m, z1.d, z2.d, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b,
@@ -185,8 +201,9 @@ define <vscale x 2 x double> @fcmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fcmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fcmla_lane_h:
-; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmla z0.h, z1.h, z2.h[3], #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %a,
                                                                        <vscale x 8 x half> %b,
                                                                        <vscale x 8 x half> %c,
@@ -197,8 +214,9 @@ define <vscale x 8 x half> @fcmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x h
 
 define <vscale x 4 x float> @fcmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fcmla_lane_s:
-; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmla z0.s, z1.s, z2.s[1], #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                         <vscale x 4 x float> %b,
                                                                         <vscale x 4 x float> %c,
@@ -213,8 +231,9 @@ define <vscale x 4 x float> @fcmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x
 
 define <vscale x 8 x half> @fdiv_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fdiv_h:
-; CHECK: fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -223,8 +242,9 @@ define <vscale x 8 x half> @fdiv_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fdiv_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fdiv_s:
-; CHECK: fdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -233,8 +253,9 @@ define <vscale x 4 x float> @fdiv_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fdiv_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fdiv_d:
-; CHECK: fdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -247,8 +268,9 @@ define <vscale x 2 x double> @fdiv_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fdivr_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fdivr_h:
-; CHECK: fdivr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdivr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
@@ -257,8 +279,9 @@ define <vscale x 8 x half> @fdivr_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fdivr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fdivr_s:
-; CHECK: fdivr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
@@ -267,8 +290,9 @@ define <vscale x 4 x float> @fdivr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fdivr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fdivr_d:
-; CHECK: fdivr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
@@ -281,24 +305,27 @@ define <vscale x 2 x double> @fdivr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fexpa_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: fexpa_h:
-; CHECK: fexpa z0.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fexpa z0.h, z0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fexpa.x.nxv8f16(<vscale x 8 x i16> %a)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 4 x float> @fexpa_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: fexpa_s:
-; CHECK: fexpa z0.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fexpa z0.s, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fexpa.x.nxv4f32(<vscale x 4 x i32> %a)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @fexpa_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: fexpa_d:
-; CHECK: fexpa z0.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fexpa z0.d, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fexpa.x.nxv2f64(<vscale x 2 x i64> %a)
   ret <vscale x 2 x double> %out
 }
@@ -309,8 +336,9 @@ define <vscale x 2 x double> @fexpa_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 8 x half> @fmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmad_h:
-; CHECK: fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
@@ -320,8 +348,9 @@ define <vscale x 8 x half> @fmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fmad_s:
-; CHECK: fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
@@ -331,8 +360,9 @@ define <vscale x 4 x float> @fmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fmad_d:
-; CHECK: fmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,
@@ -346,8 +376,9 @@ define <vscale x 2 x double> @fmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fmax_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmax_h:
-; CHECK: fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -356,8 +387,9 @@ define <vscale x 8 x half> @fmax_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmax_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmax_s:
-; CHECK: fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -366,8 +398,9 @@ define <vscale x 4 x float> @fmax_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmax_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmax_d:
-; CHECK: fmax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -380,8 +413,9 @@ define <vscale x 2 x double> @fmax_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fmaxnm_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmaxnm_h:
-; CHECK: fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %a,
                                                                    <vscale x 8 x half> %b)
@@ -390,8 +424,9 @@ define <vscale x 8 x half> @fmaxnm_h(<vscale x 8 x i1> %pg, <vscale x 8 x half>
 
 define <vscale x 4 x float> @fmaxnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmaxnm_s:
-; CHECK: fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %a,
                                                                     <vscale x 4 x float> %b)
@@ -400,8 +435,9 @@ define <vscale x 4 x float> @fmaxnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float
 
 define <vscale x 2 x double> @fmaxnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmaxnm_d:
-; CHECK: fmaxnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %a,
                                                                      <vscale x 2 x double> %b)
@@ -414,8 +450,9 @@ define <vscale x 2 x double> @fmaxnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
 
 define <vscale x 8 x half> @fmin_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmin_h:
-; CHECK: fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -424,8 +461,9 @@ define <vscale x 8 x half> @fmin_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmin_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmin_s:
-; CHECK: fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -434,8 +472,9 @@ define <vscale x 4 x float> @fmin_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmin_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmin_d:
-; CHECK: fmin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -448,8 +487,9 @@ define <vscale x 2 x double> @fmin_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fminnm_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fminnm_h:
-; CHECK: fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %a,
                                                                    <vscale x 8 x half> %b)
@@ -458,8 +498,9 @@ define <vscale x 8 x half> @fminnm_h(<vscale x 8 x i1> %pg, <vscale x 8 x half>
 
 define <vscale x 4 x float> @fminnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fminnm_s:
-; CHECK: fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %a,
                                                                     <vscale x 4 x float> %b)
@@ -468,8 +509,9 @@ define <vscale x 4 x float> @fminnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float
 
 define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fminnm_d:
-; CHECK: fminnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %a,
                                                                      <vscale x 2 x double> %b)
@@ -482,8 +524,9 @@ define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
 
 define <vscale x 8 x half> @fmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmla_h:
-; CHECK: fmla z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
@@ -493,8 +536,9 @@ define <vscale x 8 x half> @fmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fmla_s:
-; CHECK: fmla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
@@ -504,8 +548,9 @@ define <vscale x 4 x float> @fmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fmla_d:
-; CHECK: fmla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,
@@ -519,8 +564,9 @@ define <vscale x 2 x double> @fmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmla_lane_h:
-; CHECK: fmla z0.h, z1.h, z2.h[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.h, z1.h, z2.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %a,
                                                                       <vscale x 8 x half> %b,
                                                                       <vscale x 8 x half> %c,
@@ -530,8 +576,9 @@ define <vscale x 8 x half> @fmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x ha
 
 define <vscale x 4 x float> @fmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fmla_lane_s:
-; CHECK: fmla z0.s, z1.s, z2.s[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.s, z1.s, z2.s[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                        <vscale x 4 x float> %b,
                                                                        <vscale x 4 x float> %c,
@@ -541,8 +588,9 @@ define <vscale x 4 x float> @fmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x
 
 define <vscale x 2 x double> @fmla_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fmla_lane_d:
-; CHECK: fmla z0.d, z1.d, z2.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.d, z1.d, z2.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %a,
                                                                         <vscale x 2 x double> %b,
                                                                         <vscale x 2 x double> %c,
@@ -556,8 +604,9 @@ define <vscale x 2 x double> @fmla_lane_d(<vscale x 2 x double> %a, <vscale x 2
 
 define <vscale x 8 x half> @fmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmls_h:
-; CHECK: fmls z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
@@ -567,8 +616,9 @@ define <vscale x 8 x half> @fmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fmls_s:
-; CHECK: fmls z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
@@ -578,8 +628,9 @@ define <vscale x 4 x float> @fmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fmls_d:
-; CHECK: fmls z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,
@@ -593,8 +644,9 @@ define <vscale x 2 x double> @fmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fmls_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmls_lane_h:
-; CHECK: fmls z0.h, z1.h, z2.h[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.h, z1.h, z2.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
                                                                  <vscale x 8 x half> %c,
@@ -604,8 +656,9 @@ define <vscale x 8 x half> @fmls_lane_h(<vscale x 8 x half> %a, <vscale x 8 x ha
 
 define <vscale x 4 x float> @fmls_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fmls_lane_s:
-; CHECK: fmls z0.s, z1.s, z2.s[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.s, z1.s, z2.s[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
                                                                   <vscale x 4 x float> %c,
@@ -615,8 +668,9 @@ define <vscale x 4 x float> @fmls_lane_s(<vscale x 4 x float> %a, <vscale x 4 x
 
 define <vscale x 2 x double> @fmls_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fmls_lane_d:
-; CHECK: fmls z0.d, z1.d, z2.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.d, z1.d, z2.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,
                                                                    <vscale x 2 x double> %c,
@@ -630,8 +684,9 @@ define <vscale x 2 x double> @fmls_lane_d(<vscale x 2 x double> %a, <vscale x 2
 
 define <vscale x 8 x half> @fmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmsb_h:
-; CHECK: fmsb z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmsb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
@@ -641,8 +696,9 @@ define <vscale x 8 x half> @fmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fmsb_s:
-; CHECK: fmsb z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmsb z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
@@ -652,8 +708,9 @@ define <vscale x 4 x float> @fmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fmsb_d:
-; CHECK: fmsb z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmsb z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,
@@ -667,8 +724,9 @@ define <vscale x 2 x double> @fmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fmul_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmul_h:
-; CHECK: fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -677,8 +735,9 @@ define <vscale x 8 x half> @fmul_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fmul_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmul_s:
-; CHECK: fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -687,8 +746,9 @@ define <vscale x 4 x float> @fmul_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmul_d:
-; CHECK: fmul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -701,8 +761,9 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fmul_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmul_lane_h:
-; CHECK: fmul z0.h, z0.h, z1.h[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.h, z0.h, z1.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half> %a,
                                                                       <vscale x 8 x half> %b,
                                                                       i32 3)
@@ -711,8 +772,9 @@ define <vscale x 8 x half> @fmul_lane_h(<vscale x 8 x half> %a, <vscale x 8 x ha
 
 define <vscale x 4 x float> @fmul_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmul_lane_s:
-; CHECK: fmul z0.s, z0.s, z1.s[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.s, z0.s, z1.s[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                        <vscale x 4 x float> %b,
                                                                        i32 2)
@@ -721,8 +783,9 @@ define <vscale x 4 x float> @fmul_lane_s(<vscale x 4 x float> %a, <vscale x 4 x
 
 define <vscale x 2 x double> @fmul_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmul_lane_d:
-; CHECK: fmul z0.d, z0.d, z1.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.d, z0.d, z1.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double> %a,
                                                                         <vscale x 2 x double> %b,
                                                                         i32 1)
@@ -735,8 +798,9 @@ define <vscale x 2 x double> @fmul_lane_d(<vscale x 2 x double> %a, <vscale x 2
 
 define <vscale x 8 x half> @fmulx_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmulx_h:
-; CHECK: fmulx z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmulx z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
@@ -745,8 +809,9 @@ define <vscale x 8 x half> @fmulx_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fmulx_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmulx_s:
-; CHECK: fmulx z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmulx z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
@@ -755,8 +820,9 @@ define <vscale x 4 x float> @fmulx_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fmulx_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmulx_d:
-; CHECK: fmulx z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmulx z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
@@ -769,8 +835,9 @@ define <vscale x 2 x double> @fmulx_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fneg_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fneg_h:
-; CHECK: fneg z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fneg z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -779,8 +846,9 @@ define <vscale x 8 x half> @fneg_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x float> @fneg_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fneg_s:
-; CHECK: fneg z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fneg z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %b)
@@ -789,8 +857,9 @@ define <vscale x 4 x float> @fneg_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %
 
 define <vscale x 2 x double> @fneg_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fneg_d:
-; CHECK: fneg z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fneg z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %b)
@@ -803,8 +872,9 @@ define <vscale x 2 x double> @fneg_d(<vscale x 2 x double> %a, <vscale x 2 x i1>
 
 define <vscale x 8 x half> @fnmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fnmad_h:
-; CHECK: fnmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b,
@@ -814,8 +884,9 @@ define <vscale x 8 x half> @fnmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fnmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fnmad_s:
-; CHECK: fnmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmad z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b,
@@ -825,8 +896,9 @@ define <vscale x 4 x float> @fnmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fnmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fnmad_d:
-; CHECK: fnmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmad z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b,
@@ -840,8 +912,9 @@ define <vscale x 2 x double> @fnmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fnmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fnmla_h:
-; CHECK: fnmla z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b,
@@ -851,8 +924,9 @@ define <vscale x 8 x half> @fnmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fnmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fnmla_s:
-; CHECK: fnmla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b,
@@ -862,8 +936,9 @@ define <vscale x 4 x float> @fnmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fnmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fnmla_d:
-; CHECK: fnmla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b,
@@ -877,8 +952,9 @@ define <vscale x 2 x double> @fnmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fnmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fnmls_h:
-; CHECK: fnmls z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmls z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b,
@@ -888,8 +964,9 @@ define <vscale x 8 x half> @fnmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fnmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fnmls_s:
-; CHECK: fnmls z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmls z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b,
@@ -899,8 +976,9 @@ define <vscale x 4 x float> @fnmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fnmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fnmls_d:
-; CHECK: fnmls z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmls z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b,
@@ -914,8 +992,9 @@ define <vscale x 2 x double> @fnmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @fnmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fnmsb_h:
-; CHECK: fnmsb z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmsb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b,
@@ -925,8 +1004,9 @@ define <vscale x 8 x half> @fnmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fnmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
 ; CHECK-LABEL: fnmsb_s:
-; CHECK: fnmsb z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmsb z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b,
@@ -936,8 +1016,9 @@ define <vscale x 4 x float> @fnmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
 ; CHECK-LABEL: fnmsb_d:
-; CHECK: fnmsb z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fnmsb z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b,
@@ -951,24 +1032,27 @@ define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @frecpe_h(<vscale x 8 x half> %a) {
 ; CHECK-LABEL: frecpe_h:
-; CHECK: frecpe z0.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frecpe z0.h, z0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frecpe.x.nxv8f16(<vscale x 8 x half> %a)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 4 x float> @frecpe_s(<vscale x 4 x float> %a) {
 ; CHECK-LABEL: frecpe_s:
-; CHECK: frecpe z0.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frecpe z0.s, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frecpe.x.nxv4f32(<vscale x 4 x float> %a)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @frecpe_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: frecpe_d:
-; CHECK: frecpe z0.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frecpe z0.d, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frecpe.x.nxv2f64(<vscale x 2 x double> %a)
   ret <vscale x 2 x double> %out
 }
@@ -979,8 +1063,9 @@ define <vscale x 2 x double> @frecpe_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
 
 define <vscale x 8 x half> @frecpx_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frecpx_h:
-; CHECK: frecpx z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frecpx z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frecpx.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %b)
@@ -989,8 +1074,9 @@ define <vscale x 8 x half> @frecpx_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frecpx_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frecpx_s:
-; CHECK: frecpx z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frecpx z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %b)
@@ -999,8 +1085,9 @@ define <vscale x 4 x float> @frecpx_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frecpx_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frecpx_d:
-; CHECK: frecpx z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frecpx z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frecpx.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %b)
@@ -1013,8 +1100,9 @@ define <vscale x 2 x double> @frecpx_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frinta_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frinta_h:
-; CHECK: frinta z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frinta.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1023,8 +1111,9 @@ define <vscale x 8 x half> @frinta_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frinta_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frinta_s:
-; CHECK: frinta z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1033,8 +1122,9 @@ define <vscale x 4 x float> @frinta_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frinta_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frinta_d:
-; CHECK: frinta z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frinta.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1047,8 +1137,9 @@ define <vscale x 2 x double> @frinta_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frinti_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frinti_h:
-; CHECK: frinti z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinti z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frinti.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1057,8 +1148,9 @@ define <vscale x 8 x half> @frinti_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frinti_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frinti_s:
-; CHECK: frinti z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinti z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1067,8 +1159,9 @@ define <vscale x 4 x float> @frinti_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frinti_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frinti_d:
-; CHECK: frinti z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinti z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frinti.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1081,8 +1174,9 @@ define <vscale x 2 x double> @frinti_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frintm_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frintm_h:
-; CHECK: frintm z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintm.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1091,8 +1185,9 @@ define <vscale x 8 x half> @frintm_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frintm_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frintm_s:
-; CHECK: frintm z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1101,8 +1196,9 @@ define <vscale x 4 x float> @frintm_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frintm_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frintm_d:
-; CHECK: frintm z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintm.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1115,8 +1211,9 @@ define <vscale x 2 x double> @frintm_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frintn_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frintn_h:
-; CHECK: frintn z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintn.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1125,8 +1222,9 @@ define <vscale x 8 x half> @frintn_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frintn_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frintn_s:
-; CHECK: frintn z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1135,8 +1233,9 @@ define <vscale x 4 x float> @frintn_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frintn_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frintn_d:
-; CHECK: frintn z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintn.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1149,8 +1248,9 @@ define <vscale x 2 x double> @frintn_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frintp_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frintp_h:
-; CHECK: frintp z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintp.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1159,8 +1259,9 @@ define <vscale x 8 x half> @frintp_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frintp_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frintp_s:
-; CHECK: frintp z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1169,8 +1270,9 @@ define <vscale x 4 x float> @frintp_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frintp_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frintp_d:
-; CHECK: frintp z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintp.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1183,8 +1285,9 @@ define <vscale x 2 x double> @frintp_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frintx_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frintx_h:
-; CHECK: frintx z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintx.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1193,8 +1296,9 @@ define <vscale x 8 x half> @frintx_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frintx_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frintx_s:
-; CHECK: frintx z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1203,8 +1307,9 @@ define <vscale x 4 x float> @frintx_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frintx_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frintx_d:
-; CHECK: frintx z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintx.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1217,8 +1322,9 @@ define <vscale x 2 x double> @frintx_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frintz_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: frintz_h:
-; CHECK: frintz z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintz.nxv8f16(<vscale x 8 x half> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -1227,8 +1333,9 @@ define <vscale x 8 x half> @frintz_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x float> @frintz_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: frintz_s:
-; CHECK: frintz z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -1237,8 +1344,9 @@ define <vscale x 4 x float> @frintz_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @frintz_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: frintz_d:
-; CHECK: frintz z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintz.nxv2f64(<vscale x 2 x double> %a,
                                                                      <vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %b)
@@ -1251,24 +1359,27 @@ define <vscale x 2 x double> @frintz_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @frsqrte_h(<vscale x 8 x half> %a) {
 ; CHECK-LABEL: frsqrte_h:
-; CHECK: frsqrte z0.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frsqrte z0.h, z0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrte.x.nxv8f16(<vscale x 8 x half> %a)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 4 x float> @frsqrte_s(<vscale x 4 x float> %a) {
 ; CHECK-LABEL: frsqrte_s:
-; CHECK: frsqrte z0.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frsqrte z0.s, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrte.x.nxv4f32(<vscale x 4 x float> %a)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @frsqrte_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: frsqrte_d:
-; CHECK: frsqrte z0.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frsqrte z0.d, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrte.x.nxv2f64(<vscale x 2 x double> %a)
   ret <vscale x 2 x double> %out
 }
@@ -1279,8 +1390,9 @@ define <vscale x 2 x double> @frsqrte_d(<vscale x 2 x i1> %pg, <vscale x 2 x dou
 
 define <vscale x 8 x half> @fscale_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: fscale_h:
-; CHECK: fscale z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fscale z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %a,
                                                                    <vscale x 8 x i16> %b)
@@ -1289,8 +1401,9 @@ define <vscale x 8 x half> @fscale_h(<vscale x 8 x i1> %pg, <vscale x 8 x half>
 
 define <vscale x 4 x float> @fscale_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: fscale_s:
-; CHECK: fscale z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %a,
                                                                     <vscale x 4 x i32> %b)
@@ -1299,8 +1412,9 @@ define <vscale x 4 x float> @fscale_s(<vscale x 4 x i1> %pg, <vscale x 4 x float
 
 define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: fscale_d:
-; CHECK: fscale z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fscale z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -1313,8 +1427,9 @@ define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
 
 define <vscale x 8 x half> @fsqrt_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fsqrt_h:
-; CHECK: fsqrt z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsqrt z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsqrt.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %b)
@@ -1323,8 +1438,9 @@ define <vscale x 8 x half> @fsqrt_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %p
 
 define <vscale x 4 x float> @fsqrt_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fsqrt_s:
-; CHECK: fsqrt z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsqrt z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %b)
@@ -1333,8 +1449,9 @@ define <vscale x 4 x float> @fsqrt_s(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @fsqrt_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fsqrt_d:
-; CHECK: fsqrt z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsqrt z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsqrt.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %b)
@@ -1347,8 +1464,9 @@ define <vscale x 2 x double> @fsqrt_d(<vscale x 2 x double> %a, <vscale x 2 x i1
 
 define <vscale x 8 x half> @fsub_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fsub_h:
-; CHECK: fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
@@ -1357,8 +1475,9 @@ define <vscale x 8 x half> @fsub_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a
 
 define <vscale x 4 x float> @fsub_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fsub_s:
-; CHECK: fsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
@@ -1367,8 +1486,9 @@ define <vscale x 4 x float> @fsub_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fsub_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fsub_d:
-; CHECK: fsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
@@ -1381,8 +1501,9 @@ define <vscale x 2 x double> @fsub_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
 
 define <vscale x 8 x half> @fsubr_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fsubr_h:
-; CHECK: fsubr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
@@ -1391,8 +1512,9 @@ define <vscale x 8 x half> @fsubr_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @fsubr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fsubr_s:
-; CHECK: fsubr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
@@ -1401,8 +1523,9 @@ define <vscale x 4 x float> @fsubr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fsubr_d:
-; CHECK: fsubr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
@@ -1415,8 +1538,9 @@ define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 
 define <vscale x 8 x half> @ftmad_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: ftmad_h:
-; CHECK: ftmad z0.h, z0.h, z1.h, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftmad z0.h, z0.h, z1.h, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half> %a,
                                                                     <vscale x 8 x half> %b,
                                                                     i32 0)
@@ -1425,8 +1549,9 @@ define <vscale x 8 x half> @ftmad_h(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 4 x float> @ftmad_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: ftmad_s:
-; CHECK: ftmad z0.s, z0.s, z1.s, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftmad z0.s, z0.s, z1.s, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float> %a,
                                                                      <vscale x 4 x float> %b,
                                                                      i32 0)
@@ -1435,8 +1560,9 @@ define <vscale x 4 x float> @ftmad_s(<vscale x 4 x float> %a, <vscale x 4 x floa
 
 define <vscale x 2 x double> @ftmad_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: ftmad_d:
-; CHECK: ftmad z0.d, z0.d, z1.d, #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftmad z0.d, z0.d, z1.d, #7
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double> %a,
                                                                       <vscale x 2 x double> %b,
                                                                       i32 7)
@@ -1449,8 +1575,9 @@ define <vscale x 2 x double> @ftmad_d(<vscale x 2 x double> %a, <vscale x 2 x do
 
 define <vscale x 8 x half> @ftsmul_h(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ftsmul_h:
-; CHECK: ftsmul z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftsmul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ftsmul.x.nxv8f16(<vscale x 8 x half> %a,
                                                                      <vscale x 8 x i16> %b)
   ret <vscale x 8 x half> %out
@@ -1458,8 +1585,9 @@ define <vscale x 8 x half> @ftsmul_h(<vscale x 8 x half> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x float> @ftsmul_s(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ftsmul_s:
-; CHECK: ftsmul z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftsmul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ftsmul.x.nxv4f32(<vscale x 4 x float> %a,
                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x float> %out
@@ -1467,8 +1595,9 @@ define <vscale x 4 x float> @ftsmul_s(<vscale x 4 x float> %a, <vscale x 4 x i32
 
 define <vscale x 2 x double> @ftsmul_d(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ftsmul_d:
-; CHECK: ftsmul z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftsmul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ftsmul.x.nxv2f64(<vscale x 2 x double> %a,
                                                                        <vscale x 2 x i64> %b)
   ret <vscale x 2 x double> %out
@@ -1480,8 +1609,9 @@ define <vscale x 2 x double> @ftsmul_d(<vscale x 2 x double> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @ftssel_h(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ftssel_h:
-; CHECK: ftssel z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftssel z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half> %a,
                                                                      <vscale x 8 x i16> %b)
   ret <vscale x 8 x half> %out
@@ -1489,8 +1619,9 @@ define <vscale x 8 x half> @ftssel_h(<vscale x 8 x half> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x float> @ftssel_s(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ftssel_s:
-; CHECK: ftssel z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftssel z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float> %a,
                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x float> %out
@@ -1498,8 +1629,9 @@ define <vscale x 4 x float> @ftssel_s(<vscale x 4 x float> %a, <vscale x 4 x i32
 
 define <vscale x 2 x double> @ftssel_d(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ftssel_d:
-; CHECK: ftssel z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ftssel z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double> %a,
                                                                        <vscale x 2 x i64> %b)
   ret <vscale x 2 x double> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
index 656b8a47ec159..ec3f58329f1f4 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 8 x half> @fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvt_f16_f32:
-; CHECK: fcvt z0.h, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x float> %b)
@@ -17,8 +19,9 @@ define <vscale x 8 x half> @fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x i
 
 define <vscale x 8 x half> @fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvt_f16_f64:
-; CHECK: fcvt z0.h, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x double> %b)
@@ -27,8 +30,9 @@ define <vscale x 8 x half> @fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x i
 
 define <vscale x 4 x float> @fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvt_f32_f16:
-; CHECK: fcvt z0.s, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt z0.s, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -37,8 +41,9 @@ define <vscale x 4 x float> @fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 4 x
 
 define <vscale x 4 x float> @fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvt_f32_f64:
-; CHECK: fcvt z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x double> %b)
@@ -47,8 +52,9 @@ define <vscale x 4 x float> @fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x
 
 define <vscale x 2 x double> @fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvt_f64_f16:
-; CHECK: fcvt z0.d, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt z0.d, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %a,
                                                                   <vscale x 2 x i1> %pg,
                                                                   <vscale x 8 x half> %b)
@@ -57,8 +63,9 @@ define <vscale x 2 x double> @fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 2
 
 define <vscale x 2 x double> @fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvt_f64_f32:
-; CHECK: fcvt z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %a,
                                                                   <vscale x 2 x i1> %pg,
                                                                   <vscale x 4 x float> %b)
@@ -71,8 +78,9 @@ define <vscale x 2 x double> @fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 2
 
 define <vscale x 8 x i16> @fcvtzs_i16_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtzs_i16_f16:
-; CHECK: fcvtzs z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> %a,
                                                                           <vscale x 8 x i1> %pg,
                                                                           <vscale x 8 x half> %b)
@@ -81,8 +89,9 @@ define <vscale x 8 x i16> @fcvtzs_i16_f16(<vscale x 8 x i16> %a, <vscale x 8 x i
 
 define <vscale x 4 x i32> @fcvtzs_i32_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvtzs_i32_f32:
-; CHECK: fcvtzs z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> %a,
                                                                           <vscale x 4 x i1> %pg,
                                                                           <vscale x 4 x float> %b)
@@ -91,8 +100,9 @@ define <vscale x 4 x i32> @fcvtzs_i32_f32(<vscale x 4 x i32> %a, <vscale x 4 x i
 
 define <vscale x 2 x i64> @fcvtzs_i64_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtzs_i64_f64:
-; CHECK: fcvtzs z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> %a,
                                                                           <vscale x 2 x i1> %pg,
                                                                           <vscale x 2 x double> %b)
@@ -101,8 +111,9 @@ define <vscale x 2 x i64> @fcvtzs_i64_f64(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 4 x i32> @fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtzs_i32_f16:
-; CHECK: fcvtzs z0.s, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -111,8 +122,9 @@ define <vscale x 4 x i32> @fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 4 x i
 
 define <vscale x 4 x i32> @fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtzs_i32_f64:
-; CHECK: fcvtzs z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x double> %b)
@@ -121,8 +133,9 @@ define <vscale x 4 x i32> @fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x i
 
 define <vscale x 2 x i64> @fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtzs_i64_f16:
-; CHECK: fcvtzs z0.d, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -131,8 +144,9 @@ define <vscale x 2 x i64> @fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 2 x i64> @fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvtzs_i64_f32:
-; CHECK: fcvtzs z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 4 x float> %b)
@@ -145,8 +159,9 @@ define <vscale x 2 x i64> @fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 8 x i16> @fcvtzu_i16_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtzu_i16_f16:
-; CHECK: fcvtzu z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> %a,
                                                                           <vscale x 8 x i1> %pg,
                                                                           <vscale x 8 x half> %b)
@@ -155,8 +170,9 @@ define <vscale x 8 x i16> @fcvtzu_i16_f16(<vscale x 8 x i16> %a, <vscale x 8 x i
 
 define <vscale x 4 x i32> @fcvtzu_i32_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvtzu_i32_f32:
-; CHECK: fcvtzu z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> %a,
                                                                           <vscale x 4 x i1> %pg,
                                                                           <vscale x 4 x float> %b)
@@ -165,8 +181,9 @@ define <vscale x 4 x i32> @fcvtzu_i32_f32(<vscale x 4 x i32> %a, <vscale x 4 x i
 
 define <vscale x 2 x i64> @fcvtzu_i64_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtzu_i64_f64:
-; CHECK: fcvtzu z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> %a,
                                                                           <vscale x 2 x i1> %pg,
                                                                           <vscale x 2 x double> %b)
@@ -175,8 +192,9 @@ define <vscale x 2 x i64> @fcvtzu_i64_f64(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 4 x i32> @fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtzu_i32_f16:
-; CHECK: fcvtzu z0.s, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -185,8 +203,9 @@ define <vscale x 4 x i32> @fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 4 x i
 
 define <vscale x 4 x i32> @fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtzu_i32_f64:
-; CHECK: fcvtzu z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x double> %b)
@@ -195,8 +214,9 @@ define <vscale x 4 x i32> @fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x i
 
 define <vscale x 2 x i64> @fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtzu_i64_f16:
-; CHECK: fcvtzu z0.d, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -205,8 +225,9 @@ define <vscale x 2 x i64> @fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 2 x i64> @fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvtzu_i64_f32:
-; CHECK: fcvtzu z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 4 x float> %b)
@@ -219,8 +240,9 @@ define <vscale x 2 x i64> @fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 8 x half> @scvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: scvtf_f16_i16:
-; CHECK: scvtf z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a,
                                                                           <vscale x 8 x i1> %pg,
                                                                           <vscale x 8 x i16> %b)
@@ -229,8 +251,9 @@ define <vscale x 8 x half> @scvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @scvtf_f32_i32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: scvtf_f32_i32:
-; CHECK: scvtf z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %a,
                                                                            <vscale x 4 x i1> %pg,
                                                                            <vscale x 4 x i32> %b)
@@ -239,8 +262,9 @@ define <vscale x 4 x float> @scvtf_f32_i32(<vscale x 4 x float> %a, <vscale x 4
 
 define <vscale x 2 x double> @scvtf_f64_i64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: scvtf_f64_i64:
-; CHECK: scvtf z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %a,
                                                                             <vscale x 2 x i1> %pg,
                                                                             <vscale x 2 x i64> %b)
@@ -249,8 +273,9 @@ define <vscale x 2 x double> @scvtf_f64_i64(<vscale x 2 x double> %a, <vscale x
 
 define <vscale x 8 x half> @scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: scvtf_f16_i32:
-; CHECK: scvtf z0.h, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b)
@@ -259,8 +284,9 @@ define <vscale x 8 x half> @scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x
 
 define <vscale x 8 x half> @scvtf_f16_i64(<vscale x 8 x half> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: scvtf_f16_i64:
-; CHECK: scvtf z0.h, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -269,8 +295,9 @@ define <vscale x 8 x half> @scvtf_f16_i64(<vscale x 8 x half> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: scvtf_f32_i64:
-; CHECK: scvtf z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %a,
                                                                   <vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %b)
@@ -279,8 +306,9 @@ define <vscale x 4 x float> @scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2
 
 define <vscale x 2 x double> @scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: scvtf_f64_i32:
-; CHECK: scvtf z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x i1> %pg,
                                                                    <vscale x 4 x i32> %b)
@@ -293,8 +321,9 @@ define <vscale x 2 x double> @scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x
 
 define <vscale x 8 x half> @ucvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ucvtf_f16_i16:
-; CHECK: ucvtf z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a,
                                                                           <vscale x 8 x i1> %pg,
                                                                           <vscale x 8 x i16> %b)
@@ -303,8 +332,9 @@ define <vscale x 8 x half> @ucvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @ucvtf_f32_i32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ucvtf_f32_i32:
-; CHECK: ucvtf z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %a,
                                                                            <vscale x 4 x i1> %pg,
                                                                            <vscale x 4 x i32> %b)
@@ -313,8 +343,9 @@ define <vscale x 4 x float> @ucvtf_f32_i32(<vscale x 4 x float> %a, <vscale x 4
 
 define <vscale x 2 x double> @ucvtf_f64_i64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ucvtf_f64_i64:
-; CHECK: ucvtf z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %a,
                                                                             <vscale x 2 x i1> %pg,
                                                                             <vscale x 2 x i64> %b)
@@ -323,8 +354,9 @@ define <vscale x 2 x double> @ucvtf_f64_i64(<vscale x 2 x double> %a, <vscale x
 
 define <vscale x 8 x half> @ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ucvtf_f16_i32:
-; CHECK: ucvtf z0.h, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b)
@@ -333,8 +365,9 @@ define <vscale x 8 x half> @ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x
 
 define <vscale x 8 x half> @ucvtf_f16_i64(<vscale x 8 x half> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ucvtf_f16_i64:
-; CHECK: ucvtf z0.h, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -343,8 +376,9 @@ define <vscale x 8 x half> @ucvtf_f16_i64(<vscale x 8 x half> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ucvtf_f32_i64:
-; CHECK: ucvtf z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %a,
                                                                   <vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %b)
@@ -353,8 +387,9 @@ define <vscale x 4 x float> @ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2
 
 define <vscale x 2 x double> @ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ucvtf_f64_i32:
-; CHECK: ucvtf z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x i1> %pg,
                                                                    <vscale x 4 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
index c933c2eab40df..95de90078d341 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
 ; FADDA
@@ -6,8 +7,11 @@
 
 define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fadda_f16:
-; CHECK: fadda h0, p0, h0, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fadda.nxv8f16(<vscale x 8 x i1> %pg,
                                                    half %init,
                                                    <vscale x 8 x half> %a)
@@ -16,8 +20,11 @@ define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a
 
 define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fadda_f32:
-; CHECK: fadda s0, p0, s0, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fadda.nxv4f32(<vscale x 4 x i1> %pg,
                                                     float %init,
                                                     <vscale x 4 x float> %a)
@@ -26,8 +33,11 @@ define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float>
 
 define double @fadda_f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fadda_f64:
-; CHECK: fadda d0, p0, d0, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg,
                                                      double %init,
                                                      <vscale x 2 x double> %a)
@@ -40,8 +50,10 @@ define double @fadda_f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x doub
 
 define half @faddv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: faddv_f16:
-; CHECK: faddv h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.faddv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
   ret half %res
@@ -49,8 +61,10 @@ define half @faddv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 
 define float @faddv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: faddv_f32:
-; CHECK: faddv s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
   ret float %res
@@ -58,8 +72,10 @@ define float @faddv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 
 define double @faddv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: faddv_f64:
-; CHECK: faddv d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
   ret double %res
@@ -71,8 +87,10 @@ define double @faddv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 
 define half @fmaxnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fmaxnmv_f16:
-; CHECK: fmaxnmv h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16(<vscale x 8 x i1> %pg,
                                                      <vscale x 8 x half> %a)
   ret half %res
@@ -80,8 +98,10 @@ define half @fmaxnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 
 define float @fmaxnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fmaxnmv_f32:
-; CHECK: fmaxnmv s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1> %pg,
                                                       <vscale x 4 x float> %a)
   ret float %res
@@ -89,8 +109,10 @@ define float @fmaxnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 
 define double @fmaxnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fmaxnmv_f64:
-; CHECK: fmaxnmv d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64(<vscale x 2 x i1> %pg,
                                                        <vscale x 2 x double> %a)
   ret double %res
@@ -102,8 +124,10 @@ define double @fmaxnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 
 define half @fmaxv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fmaxv_f16:
-; CHECK: fmaxv h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
   ret half %res
@@ -111,8 +135,10 @@ define half @fmaxv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 
 define float @fmaxv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fmaxv_f32:
-; CHECK: fmaxv s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fmaxv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
   ret float %res
@@ -120,8 +146,10 @@ define float @fmaxv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 
 define double @fmaxv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fmaxv_f64:
-; CHECK: fmaxv d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
   ret double %res
@@ -133,8 +161,10 @@ define double @fmaxv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 
 define half @fminnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fminnmv_f16:
-; CHECK: fminnmv h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fminnmv.nxv8f16(<vscale x 8 x i1> %pg,
                                                      <vscale x 8 x half> %a)
   ret half %res
@@ -142,8 +172,10 @@ define half @fminnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 
 define float @fminnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fminnmv_f32:
-; CHECK: fminnmv s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1> %pg,
                                                       <vscale x 4 x float> %a)
   ret float %res
@@ -151,8 +183,10 @@ define float @fminnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 
 define double @fminnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fminnmv_f64:
-; CHECK: fminnmv d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fminnmv.nxv2f64(<vscale x 2 x i1> %pg,
                                                        <vscale x 2 x double> %a)
   ret double %res
@@ -164,8 +198,10 @@ define double @fminnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 
 define half @fminv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fminv_f16:
-; CHECK: fminv h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fminv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
   ret half %res
@@ -173,8 +209,10 @@ define half @fminv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 
 define float @fminv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fminv_f32:
-; CHECK: fminv s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fminv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
   ret float %res
@@ -182,8 +220,10 @@ define float @fminv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 
 define double @fminv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fminv_f64:
-; CHECK: fminv d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
   ret double %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
index db593413f7af6..33b94b553ff9e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; LD1H
 define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1h_s_uxtw_index:
-; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 4 x i32> %b)
@@ -20,8 +22,9 @@ define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base,
 
 define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1h_s_sxtw_index:
-; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 4 x i32> %b)
@@ -31,8 +34,9 @@ define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base,
 
 define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1h_d_uxtw_index:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -42,8 +46,9 @@ define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base,
 
 define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1h_d_sxtw_index:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base,
 ; LD1W
 define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_uxtw_index:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                   i32* %base,
                                                                                   <vscale x 4 x i32> %b)
@@ -64,8 +70,9 @@ define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %base,
 
 define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_sxtw_index:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                   i32* %base,
                                                                                   <vscale x 4 x i32> %b)
@@ -74,8 +81,9 @@ define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %base,
 
 define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1w_d_uxtw_index:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                   i32* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -85,8 +93,9 @@ define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base,
 
 define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1w_d_sxtw_index:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                   i32* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -96,8 +105,9 @@ define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base,
 
 define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_uxtw_index_float:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                     float* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -106,8 +116,9 @@ define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, flo
 
 define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_sxtw_index_float:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                     float* %base,
                                                                                     <vscale x 4 x i32> %b)
@@ -117,8 +128,9 @@ define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, flo
 ; LD1D
 define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_s_uxtw_index:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                   i64* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -127,8 +139,9 @@ define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base,
 
 define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_sxtw_index:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                   i64* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -137,8 +150,9 @@ define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <
 
 define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_uxtw_index_double:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                      double* %base,
                                                                                      <vscale x 2 x i32> %b)
@@ -147,8 +161,9 @@ define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, dou
 
 define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_sxtw_index_double:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                      double* %base,
                                                                                      <vscale x 2 x i32> %b)
@@ -164,8 +179,9 @@ define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, dou
 ; LD1SH
 define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1sh_s_uxtw_index:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 4 x i32> %b)
@@ -175,8 +191,9 @@ define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base
 
 define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1sh_s_sxtw_index:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 4 x i32> %b)
@@ -186,8 +203,9 @@ define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base
 
 define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sh_d_uxtw_index:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -197,8 +215,9 @@ define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base
 
 define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sh_d_sxtw_index:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                   i16* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -209,8 +228,9 @@ define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base
 ; LD1SW
 define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sw_d_uxtw_index:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                   i32* %base,
                                                                                   <vscale x 2 x i32> %b)
@@ -220,8 +240,9 @@ define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base
 
 define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sw_d_sxtw_index:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                   i32* %base,
                                                                                   <vscale x 2 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
index ba8806986d690..5ad8fa807880e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; LD1B
 define <vscale x 4 x i32> @gld1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1b_s_uxtw:
-; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 4 x i32> %b)
@@ -20,8 +22,9 @@ define <vscale x 4 x i32> @gld1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscal
 
 define <vscale x 4 x i32> @gld1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1b_s_sxtw:
-; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 4 x i32> %b)
@@ -31,8 +34,9 @@ define <vscale x 4 x i32> @gld1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscal
 
 define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1b_d_uxtw:
-; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 2 x i32> %b)
@@ -42,8 +46,9 @@ define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscal
 
 define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1b_d_sxtw:
-; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 2 x i32> %b)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscal
 ; LD1H
 define <vscale x 4 x i32> @gld1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1h_s_uxtw:
-; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -65,8 +71,9 @@ define <vscale x 4 x i32> @gld1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vsca
 
 define <vscale x 4 x i32> @gld1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1h_s_sxtw:
-; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -76,8 +83,9 @@ define <vscale x 4 x i32> @gld1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vsca
 
 define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1h_d_uxtw:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -87,8 +95,9 @@ define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vsca
 
 define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1h_d_sxtw:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -99,8 +108,9 @@ define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vsca
 ; LD1W
 define <vscale x 4 x i32> @gld1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_uxtw:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
                                                                             i32* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -109,8 +119,9 @@ define <vscale x 4 x i32> @gld1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vsca
 
 define <vscale x 4 x i32> @gld1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_sxtw:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %pg,
                                                                             i32* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -119,8 +130,9 @@ define <vscale x 4 x i32> @gld1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vsca
 
 define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1w_d_uxtw:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                             i32* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -130,8 +142,9 @@ define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vsca
 
 define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1w_d_sxtw:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                             i32* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -141,8 +154,9 @@ define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vsca
 
 define <vscale x 4 x float> @gld1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_uxtw_float:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
                                                                               float* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -151,8 +165,9 @@ define <vscale x 4 x float> @gld1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %b
 
 define <vscale x 4 x float> @gld1w_s_sxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1w_s_sxtw_float:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> %pg,
                                                                               float* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -162,8 +177,9 @@ define <vscale x 4 x float> @gld1w_s_sxtw_float(<vscale x 4 x i1> %pg, float* %b
 ; LD1D
 define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_d_uxtw:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64(<vscale x 2 x i1> %pg,
                                                                             i64* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -172,8 +188,9 @@ define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vsca
 
 define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_d_sxtw:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64(<vscale x 2 x i1> %pg,
                                                                             i64* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -182,8 +199,9 @@ define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vsca
 
 define <vscale x 2 x double> @gld1d_d_uxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_d_uxtw_double:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                double* %base,
                                                                                <vscale x 2 x i32> %b)
@@ -192,8 +210,9 @@ define <vscale x 2 x double> @gld1d_d_uxtw_double(<vscale x 2 x i1> %pg, double*
 
 define <vscale x 2 x double> @gld1d_d_sxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1d_d_sxtw_double:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                double* %base,
                                                                                <vscale x 2 x i32> %b)
@@ -209,8 +228,9 @@ define <vscale x 2 x double> @gld1d_d_sxtw_double(<vscale x 2 x i1> %pg, double*
 ; LD1SB
 define <vscale x 4 x i32> @gld1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1sb_s_uxtw:
-; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 4 x i32> %b)
@@ -220,8 +240,9 @@ define <vscale x 4 x i32> @gld1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vsca
 
 define <vscale x 4 x i32> @gld1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1sb_s_sxtw:
-; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 4 x i32> %b)
@@ -231,8 +252,9 @@ define <vscale x 4 x i32> @gld1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vsca
 
 define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sb_d_uxtw:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 2 x i32> %b)
@@ -242,8 +264,9 @@ define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vsca
 
 define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sb_d_sxtw:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
                                                                           i8* %base,
                                                                           <vscale x 2 x i32> %b)
@@ -254,8 +277,9 @@ define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vsca
 ; LD1SH
 define <vscale x 4 x i32> @gld1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1sh_s_uxtw:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -265,8 +289,9 @@ define <vscale x 4 x i32> @gld1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vsc
 
 define <vscale x 4 x i32> @gld1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gld1sh_s_sxtw:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -276,8 +301,9 @@ define <vscale x 4 x i32> @gld1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vsc
 
 define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sh_d_uxtw:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -287,8 +313,9 @@ define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vsc
 
 define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sh_d_sxtw:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
                                                                             i16* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -299,8 +326,9 @@ define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vsc
 ; LD1SW
 define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sw_d_uxtw:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                             i32* %base,
                                                                             <vscale x 2 x i32> %b)
@@ -310,8 +338,9 @@ define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vsc
 
 define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: gld1sw_d_sxtw:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
                                                                             i32* %base,
                                                                             <vscale x 2 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
index 158ec8c4ce131..35747ed79437a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,9 +7,10 @@
 ;
 
 define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1h_index
-; CHECK:	    ld1h	{ z0.d }, p0/z, [x0, z0.d, lsl #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1h_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                              i16* %base,
                                                                              <vscale x 2 x i64> %b)
@@ -17,9 +19,10 @@ define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscal
 }
 
 define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1w_index
-; CHECK:	    ld1w	{ z0.d }, p0/z, [x0, z0.d, lsl #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1w_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                              i32* %base,
                                                                              <vscale x 2 x i64> %b)
@@ -28,9 +31,10 @@ define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscal
 }
 
 define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1d_index
-; CHECK:	    ld1d	{ z0.d }, p0/z, [x0, z0.d, lsl #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1d_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                              i64* %base,
                                                                              <vscale x 2 x i64> %b)
@@ -38,9 +42,10 @@ define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscal
 }
 
 define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1d_index_double
-; CHECK:	    ld1d	{ z0.d }, p0/z, [x0, z0.d, lsl #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1d_index_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                 double* %base,
                                                                                 <vscale x 2 x i64> %b)
@@ -53,9 +58,10 @@ define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, double*
 ;
 
 define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1sh_index
-; CHECK:	    ld1sh	{ z0.d }, p0/z, [x0, z0.d, lsl #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1sh_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                              i16* %base,
                                                                              <vscale x 2 x i64> %b)
@@ -64,9 +70,10 @@ define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vsca
 }
 
 define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1sw_index
-; CHECK:	    ld1sw	{ z0.d }, p0/z, [x0, z0.d, lsl #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1sw_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                              i32* %base,
                                                                              <vscale x 2 x i64> %b)
@@ -80,9 +87,10 @@ define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vsca
 ;
 
 define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1h_index_sxtw
-; CHECK:	    ld1h	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1h_index_sxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -94,9 +102,10 @@ define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <
 }
 
 define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1w_index_sxtw
-; CHECK:	    ld1w	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1w_index_sxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -108,9 +117,10 @@ define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <
 }
 
 define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1d_index_sxtw
-; CHECK:	    ld1d	{ z0.d }, p0/z, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1d_index_sxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -121,9 +131,10 @@ define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, i64* %base, <
 }
 
 define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1d_index_double_sxtw
-; CHECK:	    ld1d	{ z0.d }, p0/z, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1d_index_double_sxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -139,9 +150,10 @@ define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, dou
 ;
 
 define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1sh_index_sxtw
-; CHECK:	    ld1sh	{ z0.d }, p0/z, [x0, z0.d, sxtw #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1sh_index_sxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -153,9 +165,10 @@ define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, i16* %base,
 }
 
 define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1sw_index_sxtw
-; CHECK:	    ld1sw	{ z0.d }, p0/z, [x0, z0.d, sxtw #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1sw_index_sxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -172,9 +185,10 @@ define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, i32* %base,
 ;
 
 define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1h_index_uxtw
-; CHECK:	    ld1h	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1h_index_uxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -186,9 +200,10 @@ define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <
 }
 
 define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1w_index_uxtw
-; CHECK:	    ld1w	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1w_index_uxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -200,9 +215,10 @@ define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <
 }
 
 define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1d_index_uxtw
-; CHECK:	    ld1d	{ z0.d }, p0/z, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1d_index_uxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -213,9 +229,10 @@ define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, i64* %base, <
 }
 
 define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1d_index_double_uxtw
-; CHECK:	    ld1d	{ z0.d }, p0/z, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1d_index_double_uxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -231,9 +248,10 @@ define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, dou
 ;
 
 define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1sh_index_uxtw
-; CHECK:	    ld1sh	{ z0.d }, p0/z, [x0, z0.d, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1sh_index_uxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -245,9 +263,10 @@ define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, i16* %base,
 }
 
 define <vscale x 2 x i64> @gld1sw_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gld1sw_index_uxtw
-; CHECK:	    ld1sw	{ z0.d }, p0/z, [x0, z0.d, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: gld1sw_index_uxtw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
index dd61c527d3b24..1dc15e0f1c844 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -7,8 +8,9 @@
 
 define <vscale x 2 x i64> @gld1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1b_d:
-; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
                                                                      i8* %base,
                                                                      <vscale x 2 x i64> %b)
@@ -18,8 +20,9 @@ define <vscale x 2 x i64> @gld1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2
 
 define <vscale x 2 x i64> @gld1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1h_d:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
                                                                        i16* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -29,8 +32,9 @@ define <vscale x 2 x i64> @gld1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x
 
 define <vscale x 2 x i64> @gld1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gld1w_d:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
                                                                        i32* %base,
                                                                        <vscale x 2 x i64> %offsets)
@@ -40,8 +44,9 @@ define <vscale x 2 x i64> @gld1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x
 
 define <vscale x 2 x i64> @gld1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1d_d:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %pg,
                                                                        i64* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -50,8 +55,9 @@ define <vscale x 2 x i64> @gld1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x
 
 define <vscale x 2 x double> @gld1d_d_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1d_d_double:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %pg,
                                                                        double* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -65,8 +71,9 @@ define <vscale x 2 x double> @gld1d_d_double(<vscale x 2 x i1> %pg, double* %bas
 
 define <vscale x 2 x i64> @gld1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1sb_d:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
                                                                      i8* %base,
                                                                      <vscale x 2 x i64> %b)
@@ -76,8 +83,9 @@ define <vscale x 2 x i64> @gld1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x
 
 define <vscale x 2 x i64> @gld1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1sh_d:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
                                                                        i16* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -87,8 +95,9 @@ define <vscale x 2 x i64> @gld1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x
 
 define <vscale x 2 x i64> @gld1sw_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gld1sw_d:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
                                                                        i32* %base,
                                                                        <vscale x 2 x i64> %offsets)
@@ -103,8 +112,9 @@ define <vscale x 2 x i64> @gld1sw_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x
 
 define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1b_d_sxtw:
-; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -117,8 +127,9 @@ define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscal
 
 define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1h_d_sxtw:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -131,8 +142,9 @@ define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vsca
 
 define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gld1w_d_sxtw:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %offsets)
@@ -145,8 +157,9 @@ define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vsca
 
 define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1d_d_sxtw:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -158,8 +171,9 @@ define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vsca
 
 define <vscale x 2 x double> @gld1d_d_double_sxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1d_d_double_sxtw:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -176,8 +190,9 @@ define <vscale x 2 x double> @gld1d_d_double_sxtw(<vscale x 2 x i1> %pg, double*
 
 define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1sb_d_sxtw:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -190,8 +205,9 @@ define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vsca
 
 define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1sh_d_sxtw:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -204,8 +220,9 @@ define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vsc
 
 define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gld1sw_d_sxtw:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %offsets)
@@ -223,8 +240,9 @@ define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vsc
 
 define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1b_d_uxtw:
-; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -237,8 +255,9 @@ define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscal
 
 define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1h_d_uxtw:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -251,8 +270,9 @@ define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vsca
 
 define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gld1w_d_uxtw:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %offsets)
@@ -265,8 +285,9 @@ define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vsca
 
 define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1d_d_uxtw:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -278,8 +299,9 @@ define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vsca
 
 define <vscale x 2 x double> @gld1d_d_double_uxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1d_d_double_uxtw:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -296,8 +318,9 @@ define <vscale x 2 x double> @gld1d_d_double_uxtw(<vscale x 2 x i1> %pg, double*
 
 define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1sb_d_uxtw:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -310,8 +333,9 @@ define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vsca
 
 define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gld1sh_d_uxtw:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -324,8 +348,9 @@ define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vsc
 
 define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gld1sw_d_uxtw:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %offsets)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
index c7798e7f52d25..a8b10033d0f15 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; LD1B
 define <vscale x 4 x i32> @gld1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1b_s_imm_offset:
-; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                            <vscale x 4 x i32> %base,
                                                                                            i64 16)
@@ -19,8 +21,9 @@ define <vscale x 4 x i32> @gld1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 2 x i64> @gld1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1b_d_imm_offset:
-; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                            <vscale x 2 x i64> %base,
                                                                                            i64 16)
@@ -31,8 +34,9 @@ define <vscale x 2 x i64> @gld1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2
 ; LD1H
 define <vscale x 4 x i32> @gld1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1h_s_imm_offset:
-; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 16)
@@ -42,8 +46,9 @@ define <vscale x 4 x i32> @gld1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 2 x i64> @gld1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1h_d_imm_offset:
-; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gld1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2
 ; LD1W
 define <vscale x 4 x i32> @gld1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1w_s_imm_offset:
-; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 16)
@@ -64,8 +70,9 @@ define <vscale x 4 x i32> @gld1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 2 x i64> @gld1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1w_d_imm_offset:
-; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -75,8 +82,9 @@ define <vscale x 2 x i64> @gld1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 4 x float> @gld1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1w_s_imm_offset_float:
-; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 16)
@@ -86,8 +94,9 @@ define <vscale x 4 x float> @gld1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vs
 ; LD1D
 define <vscale x 2 x i64> @gld1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1d_d_imm_offset:
-; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -96,8 +105,9 @@ define <vscale x 2 x i64> @gld1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 2 x double> @gld1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1d_d_imm_offset_double:
-; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                 <vscale x 2 x i64> %base,
                                                                                                 i64 16)
@@ -112,8 +122,9 @@ define <vscale x 2 x double> @gld1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <
 ; LD1SB
 define <vscale x 4 x i32> @gld1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1sb_s_imm_offset:
-; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                            <vscale x 4 x i32> %base,
                                                                                            i64 16)
@@ -123,8 +134,9 @@ define <vscale x 4 x i32> @gld1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x
 
 define <vscale x 2 x i64> @gld1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1sb_d_imm_offset:
-; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                            <vscale x 2 x i64> %base,
                                                                                            i64 16)
@@ -135,8 +147,9 @@ define <vscale x 2 x i64> @gld1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 ; LD1SH
 define <vscale x 4 x i32> @gld1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1sh_s_imm_offset:
-; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [z0.s, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 16)
@@ -146,8 +159,9 @@ define <vscale x 4 x i32> @gld1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x
 
 define <vscale x 2 x i64> @gld1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1sh_d_imm_offset:
-; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -158,8 +172,9 @@ define <vscale x 2 x i64> @gld1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 ; LD1SW
 define <vscale x 2 x i64> @gld1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1sw_d_imm_offset:
-; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [z0.d, #16]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 16)
@@ -175,9 +190,10 @@ define <vscale x 2 x i64> @gld1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x
 ; LD1B
 define <vscale x 4 x i32> @gld1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1b_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                            <vscale x 4 x i32> %base,
                                                                                            i64 32)
@@ -187,9 +203,10 @@ define <vscale x 4 x i32> @gld1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @gld1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1b_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                            <vscale x 2 x i64> %base,
                                                                                            i64 32)
@@ -200,9 +217,10 @@ define <vscale x 2 x i64> @gld1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg
 ; LD1H
 define <vscale x 4 x i32> @gld1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1h_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 63)
@@ -212,9 +230,10 @@ define <vscale x 4 x i32> @gld1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @gld1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1h_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 63)
@@ -225,9 +244,10 @@ define <vscale x 2 x i64> @gld1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg
 ; LD1W
 define <vscale x 4 x i32> @gld1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1w_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 125)
@@ -236,9 +256,10 @@ define <vscale x 4 x i32> @gld1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @gld1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1w_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 125)
@@ -248,9 +269,10 @@ define <vscale x 2 x i64> @gld1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg
 
 define <vscale x 4 x float> @gld1w_s_imm_offset_out_of_range_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1w_s_imm_offset_out_of_range_float:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 125)
@@ -260,9 +282,10 @@ define <vscale x 4 x float> @gld1w_s_imm_offset_out_of_range_float(<vscale x 4 x
 ; LD1D
 define <vscale x 2 x i64> @gld1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1d_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #249
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #249
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 249)
@@ -271,9 +294,10 @@ define <vscale x 2 x i64> @gld1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg
 
 define <vscale x 2 x double> @gld1d_d_imm_offset_out_of_range_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1d_d_imm_offset_out_of_range_double:
-; CHECK: mov	w8, #249
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #249
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                 <vscale x 2 x i64> %base,
                                                                                                 i64 249)
@@ -288,9 +312,10 @@ define <vscale x 2 x double> @gld1d_d_imm_offset_out_of_range_double(<vscale x 2
 ; LD1SB
 define <vscale x 4 x i32> @gld1sb_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1sb_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                            <vscale x 4 x i32> %base,
                                                                                            i64 32)
@@ -300,9 +325,10 @@ define <vscale x 4 x i32> @gld1sb_s_imm_offset_out_of_range(<vscale x 4 x i1> %p
 
 define <vscale x 2 x i64> @gld1sb_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1sb_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                            <vscale x 2 x i64> %base,
                                                                                            i64 32)
@@ -313,9 +339,10 @@ define <vscale x 2 x i64> @gld1sb_d_imm_offset_out_of_range(<vscale x 2 x i1> %p
 ; LD1SH
 define <vscale x 4 x i32> @gld1sh_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: gld1sh_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 63)
@@ -325,9 +352,10 @@ define <vscale x 4 x i32> @gld1sh_s_imm_offset_out_of_range(<vscale x 4 x i1> %p
 
 define <vscale x 2 x i64> @gld1sh_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1sh_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 63)
@@ -338,9 +366,10 @@ define <vscale x 2 x i64> @gld1sh_d_imm_offset_out_of_range(<vscale x 2 x i1> %p
 ; LD1SW
 define <vscale x 2 x i64> @gld1sw_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: gld1sw_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x8, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 125)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
index 3d84c0bbfc719..5364b2da52fdf 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; LD1B
 define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gld1b_s_scalar_offset:
-; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                            <vscale x 4 x i32> %base,
                                                                                            i64 %offset)
@@ -19,8 +21,9 @@ define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1b_d_scalar_offset:
-; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                            <vscale x 2 x i64> %base,
                                                                                            i64 %offset)
@@ -31,8 +34,9 @@ define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale
 ; LD1H
 define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gld1h_s_scalar_offset:
-; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 %offset)
@@ -42,8 +46,9 @@ define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1h_d_scalar_offset:
-; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)
@@ -54,8 +59,9 @@ define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale
 ; LD1W
 define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gld1w_s_scalar_offset:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 %offset)
@@ -64,8 +70,9 @@ define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1w_d_scalar_offset:
-; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)
@@ -75,8 +82,9 @@ define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale
 
 define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gld1w_s_scalar_offset_float:
-; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
                                                                                                i64 %offset)
@@ -86,8 +94,9 @@ define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg,
 ; LD1D
 define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1d_d_scalar_offset:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)
@@ -96,8 +105,9 @@ define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale
 
 define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1d_d_scalar_offset_double:
-; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                 <vscale x 2 x i64> %base,
                                                                                                 i64 %offset)
@@ -111,8 +121,9 @@ define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg
 ; LD1SB
 define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gld1sb_s_scalar_offset:
-; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                            <vscale x 4 x i32> %base,
                                                                                            i64 %offset)
@@ -122,8 +133,9 @@ define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1sb_d_scalar_offset:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                            <vscale x 2 x i64> %base,
                                                                                            i64 %offset)
@@ -134,8 +146,9 @@ define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale
 ; LD1SH
 define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gld1sh_s_scalar_offset:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
                                                                                              i64 %offset)
@@ -145,8 +158,9 @@ define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1sh_d_scalar_offset:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)
@@ -157,8 +171,9 @@ define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale
 ; LD1SW
 define <vscale x 2 x i64> @gld1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gld1sw_d_scalar_offset:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
                                                                                              i64 %offset)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
index 87e22fcf73fc2..8d27408ba9dd5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
@@ -1,18 +1,21 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ; PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]    -> 32-bit indexes
 define void @llvm_aarch64_sve_prfb_gather_uxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_uxtw_index_nx4vi32:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfb_gather_scaled_sxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scaled_sxtw_index_nx4vi32:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, sxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.s, sxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
@@ -21,24 +24,27 @@ define void @llvm_aarch64_sve_prfb_gather_scaled_sxtw_index_nx4vi32(<vscale x 4
 
 define void @llvm_aarch64_sve_prfb_gather_uxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_uxtw_index_nx2vi64:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfb_gather_scaled_sxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scaled_sxtw_index_nx2vi64:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.d, sxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d, sxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
 ; PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit indexes
 define void @llvm_aarch64_sve_prfb_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scaled_nx2vi64:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.d]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes, i32 1)
   ret void
  }
@@ -48,16 +54,18 @@ define void @llvm_aarch64_sve_prfb_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg,
 ; PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]    -> 32-bit indexes
 define void @llvm_aarch64_sve_prfh_gather_uxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_uxtw_index_nx4vi32:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x0, z0.s, uxtw #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [x0, z0.s, uxtw #1]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfh_gather_scaled_sxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scaled_sxtw_index_nx4vi32:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x0, z0.s, sxtw #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [x0, z0.s, sxtw #1]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
@@ -65,16 +73,18 @@ define void @llvm_aarch64_sve_prfh_gather_scaled_sxtw_index_nx4vi32(<vscale x 4
 ; PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] -> 32-bit unpacked indexes
 define void @llvm_aarch64_sve_prfh_gather_uxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_uxtw_index_nx2vi64:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x0, z0.d, uxtw #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [x0, z0.d, uxtw #1]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfh_gather_scaled_sxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scaled_sxtw_index_nx2vi64:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x0, z0.d, sxtw #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [x0, z0.d, sxtw #1]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
@@ -82,8 +92,9 @@ define void @llvm_aarch64_sve_prfh_gather_scaled_sxtw_index_nx2vi64(<vscale x 2
 ; PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit indexes
 define void @llvm_aarch64_sve_prfh_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scaled_nx2vi64:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x0, z0.d, lsl #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [x0, z0.d, lsl #1]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes, i32 1)
   ret void
  }
@@ -93,16 +104,18 @@ define void @llvm_aarch64_sve_prfh_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg,
 ; PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]    -> 32-bit indexes
 define void @llvm_aarch64_sve_prfw_gather_uxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_uxtw_index_nx4vi32:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x0, z0.s, uxtw #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [x0, z0.s, uxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfw_gather_scaled_sxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scaled_sxtw_index_nx4vi32:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x0, z0.s, sxtw #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [x0, z0.s, sxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
@@ -110,16 +123,18 @@ define void @llvm_aarch64_sve_prfw_gather_scaled_sxtw_index_nx4vi32(<vscale x 4
 ; PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] -> 32-bit unpacked indexes
 define void @llvm_aarch64_sve_prfw_gather_uxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_uxtw_index_nx2vi64:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x0, z0.d, uxtw #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfw_gather_scaled_sxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scaled_sxtw_index_nx2vi64:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x0, z0.d, sxtw #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [x0, z0.d, sxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
@@ -127,8 +142,9 @@ define void @llvm_aarch64_sve_prfw_gather_scaled_sxtw_index_nx2vi64(<vscale x 2
 ; PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit indexes
 define void @llvm_aarch64_sve_prfw_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scaled_nx2vi64:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x0, z0.d, lsl #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [x0, z0.d, lsl #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes, i32 1)
   ret void
  }
@@ -138,16 +154,18 @@ define void @llvm_aarch64_sve_prfw_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg,
 ; PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]    -> 32-bit indexes
 define void @llvm_aarch64_sve_prfd_gather_uxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_uxtw_index_nx4vi32:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x0, z0.s, uxtw #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [x0, z0.s, uxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfd_gather_scaled_sxtw_index_nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scaled_sxtw_index_nx4vi32:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x0, z0.s, sxtw #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [x0, z0.s, sxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> %Pg, i8* %base, <vscale x 4 x i32> %indexes, i32 1)
   ret void
  }
@@ -155,16 +173,18 @@ define void @llvm_aarch64_sve_prfd_gather_scaled_sxtw_index_nx4vi32(<vscale x 4
 ; PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] -> 32-bit unpacked indexes
 define void @llvm_aarch64_sve_prfd_gather_uxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_uxtw_index_nx2vi64:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x0, z0.d, uxtw #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
 
 define void @llvm_aarch64_sve_prfd_gather_scaled_sxtw_index_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scaled_sxtw_index_nx2vi64:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x0, z0.d, sxtw #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [x0, z0.d, sxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i32> %indexes, i32 1)
   ret void
  }
@@ -172,8 +192,9 @@ define void @llvm_aarch64_sve_prfd_gather_scaled_sxtw_index_nx2vi64(<vscale x 2
 ; PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit indexes
 define void @llvm_aarch64_sve_prfd_gather_scaled_nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scaled_nx2vi64:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x0, z0.d, lsl #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [x0, z0.d, lsl #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.index.nx2vi64(<vscale x 2 x i1> %Pg, i8* %base, <vscale x 2 x i64> %indexes, i32 1)
   ret void
  }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
index 6d745eba95b6d..d8f34c42945d3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ; PRFB <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [z0.s, #7]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [z0.s, #7]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 7, i32 1)
   ret void
 }
@@ -12,8 +14,9 @@ define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32(<vscale x 4 x i3
 ; PRFB <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [z0.d, #7]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [z0.d, #7]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 7, i32 1)
   ret void
 }
@@ -21,8 +24,9 @@ define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64(<vscale x 2 x i6
 ; PRFH <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [z0.s, #6]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [z0.s, #6]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 6, i32 1)
   ret void
 }
@@ -30,8 +34,9 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32(<vscale x 4 x i3
 ; PRFH <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [z0.d, #6]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfh pldl1strm, p0, [z0.d, #6]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 6, i32 1)
   ret void
 }
@@ -39,8 +44,9 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64(<vscale x 2 x i6
 ; PRFW <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [z0.s, #12]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [z0.s, #12]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 12, i32 1)
   ret void
 }
@@ -48,8 +54,9 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32(<vscale x 4 x i3
 ; PRFW <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [z0.d, #12]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfw pldl1strm, p0, [z0.d, #12]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 12, i32 1)
   ret void
 }
@@ -57,8 +64,9 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64(<vscale x 2 x i6
 ; PRFD <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [z0.s, #16]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [z0.s, #16]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 16, i32 1)
   ret void
 }
@@ -66,8 +74,9 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32(<vscale x 4 x i3
 ; PRFD <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [z0.d, #16]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfd pldl1strm, p0, [z0.d, #16]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 16, i32 1)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
index 91da037196700..b9c1d9a42a339 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
@@ -1,28 +1,32 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ; PRFB <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 1, ..., 31
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #32
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 32, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
 }
@@ -30,26 +34,29 @@ define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFB <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 1, ..., 31
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:   prfb pldl1strm, p0, [x0, z0.d, uxtw]
-; CHECK-NEXT:   ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #32
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 32, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
 }
@@ -59,35 +66,39 @@ define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediat
 ; PRFH <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 2, ..., 62
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #63
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 63, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #33
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
   ret void
 }
@@ -95,35 +106,39 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFH <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 2, ..., 62
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:  prfb pldl1strm, p0, [x0, z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #63
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 63, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #33
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
   ret void
 }
@@ -133,35 +148,39 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
 ; PRFW <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 4, ..., 124
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #33
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
   ret void
 }
@@ -169,35 +188,39 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFW <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 124
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:   prfb pldl1strm, p0, [x0, z0.d, uxtw]
-; CHECK-NEXT:   ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #33
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
   ret void
 }
@@ -207,35 +230,39 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
 ; PRFD <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 8, ..., 248
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #33
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
   ret void
 }
@@ -243,35 +270,39 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFD <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 248
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:   prfb pldl1strm, p0, [x0, z0.d, uxtw]
-; CHECK-NEXT:   ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfb pldl1strm, p0, [x0, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfb pldl1strm, p0, [x[[N]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
-; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
 }
 
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8:
-; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #33
+; CHECK-NEXT:    prfb pldl1strm, p0, [x8, z0.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
index 6ad5afe9f430c..f576d6079e821 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple aarch64 -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s
 
 ; All these tests create a vector tuple, insert z5 into one of the elements,
 ; and finally extracts that element from the wide vector to return it.  These
@@ -12,12 +13,11 @@
 ; tuple:      { tuple2.res0, tuple2.res1 }
 ; insert z5:  {     z5     , tuple2.res1 }
 ; extract z5:       ^^
-define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                   <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                   <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
   %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
@@ -27,12 +27,11 @@ define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vsca
 ; tuple:       { tuple2.res0, tuple2.res1 }
 ; insert z5:   { tuple2.res0,     z5      }
 ; extract z5:                     ^^
-define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                   <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                   <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
   %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
@@ -44,11 +43,10 @@ define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vsca
 ; tuple:       { tuple2.res0, tuple2.res1 }
 ; insert z5:   { tuple2.res0,     z5      }
 ; extract z0:         ^^
-define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                            <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                            <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
   %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
@@ -56,10 +54,11 @@ define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %
 }
 
 ; Test extract of tuple passed into function
-define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 {
-  ; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
-  ; CHECK-NEXT:  mov     z0.d, z1.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) {
+; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
   ret <vscale x 4 x i32> %ext
 }
@@ -71,12 +70,11 @@ define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0
 ; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
 ; insert z5:   {     z5     , tuple3.res0, tuple3.res2 }
 ; extract z5:        ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
   %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
@@ -86,12 +84,11 @@ define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
 ; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
 ; extract z5:                     ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
   %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
@@ -101,12 +98,11 @@ define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
 ; insert z5:   { tuple3.res0, tuple3.res1,     z5      }
 ; extract z5:                                  ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
   %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
@@ -118,12 +114,11 @@ define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
 ; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
 ; extract z2:                                  ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                             <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                             <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
-  ; CHECK-NEXT:  mov     z0.d, z2.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
   %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
@@ -131,10 +126,11 @@ define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32>
 }
 
 ; Test extract of tuple passed into function
-define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 {
-  ; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
-  ; CHECK-NEXT:  mov     z0.d, z3.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) {
+; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
   ret <vscale x 4 x i32> %ext
 }
@@ -146,12 +142,11 @@ define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
 ; insert z5:   {     z5     , tuple4.res1, tuple4.res2, tuple4.res3 }
 ; extract z5:        ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
@@ -161,12 +156,11 @@ define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
 ; insert z5:   { tuple4.res0,     z5     , tuple4.res2, tuple4.res3 }
 ; extract z5:                     ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
@@ -176,12 +170,11 @@ define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
 ; insert z5:   { tuple4.res0, tuple4.res1,     z5     , tuple4.res3 }
 ; extract z5:                                  ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
@@ -191,12 +184,11 @@ define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
 ; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
 ; extract z5:                                               ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
-  ; CHECK-NEXT:  mov     z0.d, z5.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z5.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
@@ -208,12 +200,11 @@ define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vsc
 ; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
 ; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
 ; extract z2:                                               ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
-                                                             <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
-                                                             <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-  ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
-  ; CHECK-NEXT:  mov     z0.d, z2.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
+; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
   %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
@@ -221,16 +212,15 @@ define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32>
 }
 
 ; Test extract of tuple passed into function
-define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 {
-  ; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
-  ; CHECK-NEXT:  mov     z0.d, z3.d
-  ; CHECK-NEXT:  ret
+define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) {
+; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z3.d
+; CHECK-NEXT:    ret
   %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
   ret <vscale x 4 x i32> %ext
 }
 
-attributes #0 = { nounwind }
-
 declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
index e1f5ff448dd60..cefa3f9d825a3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -9,9 +10,11 @@
 ;
 
 define <vscale x 16 x i1> @ir_cmpeq_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmpeq_b
-; CHECK: cmpeq p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpeq_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp eq <vscale x 16 x i8> %a, %splat
@@ -19,9 +22,10 @@ define <vscale x 16 x i1> @ir_cmpeq_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmpeq_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmpeq_b
-; CHECK: cmpeq p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpeq_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg,
@@ -31,9 +35,10 @@ define <vscale x 16 x i1> @int_cmpeq_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmpeq_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmpeq_b
-; CHECK: cmpeq p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpeq_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -43,9 +48,11 @@ define <vscale x 16 x i1> @wide_cmpeq_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmpeq_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmpeq_h
-; CHECK: cmpeq p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpeq_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp eq <vscale x 8 x i16> %a, %splat
@@ -53,9 +60,10 @@ define <vscale x 8 x i1> @ir_cmpeq_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmpeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmpeq_h
-; CHECK: cmpeq p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpeq_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.nxv8i16(<vscale x 8 x i1> %pg,
@@ -65,9 +73,10 @@ define <vscale x 8 x i1> @int_cmpeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmpeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmpeq_h
-; CHECK: cmpeq p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpeq_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 -16, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -77,9 +86,11 @@ define <vscale x 8 x i1> @wide_cmpeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmpeq_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmpeq_s
-; CHECK: cmpeq p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpeq_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp eq <vscale x 4 x i32> %a, %splat
@@ -87,9 +98,10 @@ define <vscale x 4 x i1> @ir_cmpeq_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmpeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmpeq_s
-; CHECK: cmpeq p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpeq_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg,
@@ -99,9 +111,10 @@ define <vscale x 4 x i1> @int_cmpeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmpeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmpeq_s
-; CHECK: cmpeq p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpeq_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 15, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -111,9 +124,11 @@ define <vscale x 4 x i1> @wide_cmpeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmpeq_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmpeq_d
-; CHECK: cmpeq p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpeq_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp eq <vscale x 2 x i64> %a, %splat
@@ -121,9 +136,10 @@ define <vscale x 2 x i1> @ir_cmpeq_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmpeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmpeq_d
-; CHECK: cmpeq p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpeq_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpeq.nxv2i64(<vscale x 2 x i1> %pg,
@@ -137,9 +153,11 @@ define <vscale x 2 x i1> @int_cmpeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmpge_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmpge_b
-; CHECK: cmpge p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpge_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp sge <vscale x 16 x i8> %a, %splat
@@ -147,9 +165,10 @@ define <vscale x 16 x i1> @ir_cmpge_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmpge_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmpge_b
-; CHECK: cmpge p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpge_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg,
@@ -159,9 +178,10 @@ define <vscale x 16 x i1> @int_cmpge_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmpge_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmpge_b
-; CHECK: cmpge p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpge_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -171,9 +191,11 @@ define <vscale x 16 x i1> @wide_cmpge_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmpge_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmpge_h
-; CHECK: cmpge p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpge_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp sge <vscale x 8 x i16> %a, %splat
@@ -181,9 +203,10 @@ define <vscale x 8 x i1> @ir_cmpge_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmpge_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmpge_h
-; CHECK: cmpge p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpge_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.nxv8i16(<vscale x 8 x i1> %pg,
@@ -193,9 +216,10 @@ define <vscale x 8 x i1> @int_cmpge_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmpge_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmpge_h
-; CHECK: cmpge p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpge_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 -16, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -205,9 +229,11 @@ define <vscale x 8 x i1> @wide_cmpge_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmpge_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmpge_s
-; CHECK: cmpge p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpge_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp sge <vscale x 4 x i32> %a, %splat
@@ -215,9 +241,10 @@ define <vscale x 4 x i1> @ir_cmpge_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmpge_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmpge_s
-; CHECK: cmpge p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpge_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg,
@@ -227,9 +254,10 @@ define <vscale x 4 x i1> @int_cmpge_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmpge_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmpge_s
-; CHECK: cmpge p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpge_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 15, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -239,9 +267,11 @@ define <vscale x 4 x i1> @wide_cmpge_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmpge_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmpge_d
-; CHECK: cmpge p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpge_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpge p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp sge <vscale x 2 x i64> %a, %splat
@@ -249,9 +279,10 @@ define <vscale x 2 x i1> @ir_cmpge_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmpge_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmpge_d
-; CHECK: cmpge p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpge_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpge.nxv2i64(<vscale x 2 x i1> %pg,
@@ -265,9 +296,11 @@ define <vscale x 2 x i1> @int_cmpge_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmpgt_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmpgt_b
-; CHECK: cmpgt p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpgt_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp sgt <vscale x 16 x i8> %a, %splat
@@ -275,9 +308,10 @@ define <vscale x 16 x i1> @ir_cmpgt_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmpgt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmpgt_b
-; CHECK: cmpgt p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpgt_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg,
@@ -287,9 +321,10 @@ define <vscale x 16 x i1> @int_cmpgt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmpgt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmpgt_b
-; CHECK: cmpgt p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpgt_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -299,9 +334,11 @@ define <vscale x 16 x i1> @wide_cmpgt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmpgt_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmpgt_h
-; CHECK: cmpgt p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpgt_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp sgt <vscale x 8 x i16> %a, %splat
@@ -309,9 +346,10 @@ define <vscale x 8 x i1> @ir_cmpgt_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmpgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmpgt_h
-; CHECK: cmpgt p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpgt_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1> %pg,
@@ -321,9 +359,10 @@ define <vscale x 8 x i1> @int_cmpgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmpgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmpgt_h
-; CHECK: cmpgt p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpgt_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 -16, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -333,9 +372,11 @@ define <vscale x 8 x i1> @wide_cmpgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmpgt_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmpgt_s
-; CHECK: cmpgt p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpgt_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp sgt <vscale x 4 x i32> %a, %splat
@@ -343,9 +384,10 @@ define <vscale x 4 x i1> @ir_cmpgt_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmpgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmpgt_s
-; CHECK: cmpgt p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpgt_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg,
@@ -355,9 +397,10 @@ define <vscale x 4 x i1> @int_cmpgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmpgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmpgt_s
-; CHECK: cmpgt p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpgt_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 15, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -367,9 +410,11 @@ define <vscale x 4 x i1> @wide_cmpgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmpgt_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmpgt_d
-; CHECK: cmpgt p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpgt_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpgt p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp sgt <vscale x 2 x i64> %a, %splat
@@ -377,9 +422,10 @@ define <vscale x 2 x i1> @ir_cmpgt_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmpgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmpgt_d
-; CHECK: cmpgt p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpgt_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpgt.nxv2i64(<vscale x 2 x i1> %pg,
@@ -393,9 +439,11 @@ define <vscale x 2 x i1> @int_cmpgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmple_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmple_b
-; CHECK: cmple p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmple_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmple p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp sle <vscale x 16 x i8> %a, %splat
@@ -403,9 +451,10 @@ define <vscale x 16 x i1> @ir_cmple_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmple_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmple_b
-; CHECK: cmple p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmple_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg,
@@ -415,9 +464,10 @@ define <vscale x 16 x i1> @int_cmple_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmple_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmple_b
-; CHECK: cmple p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmple_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -427,9 +477,11 @@ define <vscale x 16 x i1> @wide_cmple_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmple_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmple_h
-; CHECK: cmple p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmple_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmple p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp sle <vscale x 8 x i16> %a, %splat
@@ -437,9 +489,10 @@ define <vscale x 8 x i1> @ir_cmple_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmple_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmple_h
-; CHECK: cmple p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmple_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.nxv8i16(<vscale x 8 x i1> %pg,
@@ -449,9 +502,10 @@ define <vscale x 8 x i1> @int_cmple_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmple_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmple_h
-; CHECK: cmple p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmple_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 -16, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -461,9 +515,11 @@ define <vscale x 8 x i1> @wide_cmple_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmple_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmple_s
-; CHECK: cmple p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmple_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmple p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp sle <vscale x 4 x i32> %a, %splat
@@ -471,9 +527,10 @@ define <vscale x 4 x i1> @ir_cmple_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmple_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmple_s
-; CHECK: cmple p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmple_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg,
@@ -483,9 +540,10 @@ define <vscale x 4 x i1> @int_cmple_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmple_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmple_s
-; CHECK: cmple p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmple_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 15, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -495,9 +553,11 @@ define <vscale x 4 x i1> @wide_cmple_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmple_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmple_d
-; CHECK: cmple p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmple_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmple p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp sle <vscale x 2 x i64> %a, %splat
@@ -505,9 +565,10 @@ define <vscale x 2 x i1> @ir_cmple_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmple_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmple_d
-; CHECK: cmple p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmple_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpge.nxv2i64(<vscale x 2 x i1> %pg,
@@ -521,9 +582,11 @@ define <vscale x 2 x i1> @int_cmple_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmplt_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmplt_b
-; CHECK: cmplt p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplt_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmplt p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp slt <vscale x 16 x i8> %a, %splat
@@ -531,9 +594,10 @@ define <vscale x 16 x i1> @ir_cmplt_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmplt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmplt_b
-; CHECK: cmplt p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplt_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg,
@@ -543,9 +607,10 @@ define <vscale x 16 x i1> @int_cmplt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmplt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmplt_b
-; CHECK: cmplt p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmplt_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -555,9 +620,11 @@ define <vscale x 16 x i1> @wide_cmplt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmplt_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmplt_h
-; CHECK: cmplt p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplt_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmplt p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp slt <vscale x 8 x i16> %a, %splat
@@ -565,9 +632,10 @@ define <vscale x 8 x i1> @ir_cmplt_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmplt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmplt_h
-; CHECK: cmplt p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplt_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1> %pg,
@@ -577,9 +645,10 @@ define <vscale x 8 x i1> @int_cmplt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmplt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmplt_h
-; CHECK: cmplt p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmplt_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 -16, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -589,9 +658,11 @@ define <vscale x 8 x i1> @wide_cmplt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmplt_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmplt_s
-; CHECK: cmplt p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplt_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmplt p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp slt <vscale x 4 x i32> %a, %splat
@@ -599,9 +670,10 @@ define <vscale x 4 x i1> @ir_cmplt_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmplt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmplt_s
-; CHECK: cmplt p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplt_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg,
@@ -611,9 +683,10 @@ define <vscale x 4 x i1> @int_cmplt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmplt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmplt_s
-; CHECK: cmplt p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmplt_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 15, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmplt.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -623,9 +696,11 @@ define <vscale x 4 x i1> @wide_cmplt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmplt_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmplt_d
-; CHECK: cmplt p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplt_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmplt p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp slt <vscale x 2 x i64> %a, %splat
@@ -633,9 +708,10 @@ define <vscale x 2 x i1> @ir_cmplt_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmplt_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmplt_d
-; CHECK: cmplt p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplt_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpgt.nxv2i64(<vscale x 2 x i1> %pg,
@@ -649,9 +725,11 @@ define <vscale x 2 x i1> @int_cmplt_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmpne_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmpne_b
-; CHECK: cmpne p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpne_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp ne <vscale x 16 x i8> %a, %splat
@@ -659,9 +737,10 @@ define <vscale x 16 x i1> @ir_cmpne_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmpne_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmpne_b
-; CHECK: cmpne p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpne_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg,
@@ -671,9 +750,10 @@ define <vscale x 16 x i1> @int_cmpne_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmpne_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmpne_b
-; CHECK: cmpne p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpne_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -683,9 +763,11 @@ define <vscale x 16 x i1> @wide_cmpne_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmpne_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmpne_h
-; CHECK: cmpne p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpne_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp ne <vscale x 8 x i16> %a, %splat
@@ -693,9 +775,10 @@ define <vscale x 8 x i1> @ir_cmpne_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmpne_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmpne_h
-; CHECK: cmpne p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpne_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 -16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %pg,
@@ -705,9 +788,10 @@ define <vscale x 8 x i1> @int_cmpne_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmpne_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmpne_h
-; CHECK: cmpne p0.h, p0/z, z0.h, #-16
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpne_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #-16
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 -16, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -717,9 +801,11 @@ define <vscale x 8 x i1> @wide_cmpne_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmpne_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmpne_s
-; CHECK: cmpne p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpne_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp ne <vscale x 4 x i32> %a, %splat
@@ -727,9 +813,10 @@ define <vscale x 4 x i1> @ir_cmpne_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmpne_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmpne_s
-; CHECK: cmpne p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpne_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 15, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg,
@@ -739,9 +826,10 @@ define <vscale x 4 x i1> @int_cmpne_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmpne_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmpne_s
-; CHECK: cmpne p0.s, p0/z, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpne_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #15
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 15, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -751,9 +839,11 @@ define <vscale x 4 x i1> @wide_cmpne_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmpne_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmpne_d
-; CHECK: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpne_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp ne <vscale x 2 x i64> %a, %splat
@@ -761,9 +851,10 @@ define <vscale x 2 x i1> @ir_cmpne_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmpne_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmpne_d
-; CHECK: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpne_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %pg,
@@ -781,9 +872,11 @@ define <vscale x 2 x i1> @int_cmpne_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmphi_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmphi_b
-; CHECK: cmphi p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphi_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp ugt <vscale x 16 x i8> %a, %splat
@@ -791,9 +884,10 @@ define <vscale x 16 x i1> @ir_cmphi_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmphi_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmphi_b
-; CHECK: cmphi p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphi_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg,
@@ -803,9 +897,10 @@ define <vscale x 16 x i1> @int_cmphi_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmphi_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmphi_b
-; CHECK: cmphi p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmphi_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -815,9 +910,11 @@ define <vscale x 16 x i1> @wide_cmphi_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmphi_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmphi_h
-; CHECK: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphi_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp ugt <vscale x 8 x i16> %a, %splat
@@ -825,9 +922,10 @@ define <vscale x 8 x i1> @ir_cmphi_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmphi_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmphi_h
-; CHECK: cmphi p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphi_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.nxv8i16(<vscale x 8 x i1> %pg,
@@ -837,9 +935,10 @@ define <vscale x 8 x i1> @int_cmphi_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmphi_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmphi_h
-; CHECK: cmphi p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmphi_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -849,9 +948,11 @@ define <vscale x 8 x i1> @wide_cmphi_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmphi_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmphi_s
-; CHECK: cmphi p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphi_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp ugt <vscale x 4 x i32> %a, %splat
@@ -859,9 +960,10 @@ define <vscale x 4 x i1> @ir_cmphi_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmphi_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmphi_s
-; CHECK: cmphi p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphi_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg,
@@ -871,9 +973,10 @@ define <vscale x 4 x i1> @int_cmphi_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmphi_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmphi_s
-; CHECK: cmphi p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmphi_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 68, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -883,9 +986,11 @@ define <vscale x 4 x i1> @wide_cmphi_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmphi_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmphi_d
-; CHECK: cmphi p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphi_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmphi p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp ugt <vscale x 2 x i64> %a, %splat
@@ -893,9 +998,10 @@ define <vscale x 2 x i1> @ir_cmphi_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmphi_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmphi_d
-; CHECK: cmphi p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphi_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmphi.nxv2i64(<vscale x 2 x i1> %pg,
@@ -909,9 +1015,11 @@ define <vscale x 2 x i1> @int_cmphi_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmphs_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmphs_b
-; CHECK: cmphs p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphs_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp uge <vscale x 16 x i8> %a, %splat
@@ -919,9 +1027,10 @@ define <vscale x 16 x i1> @ir_cmphs_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmphs_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmphs_b
-; CHECK: cmphs p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphs_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg,
@@ -931,9 +1040,10 @@ define <vscale x 16 x i1> @int_cmphs_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmphs_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmphs_b
-; CHECK: cmphs p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmphs_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -943,9 +1053,10 @@ define <vscale x 16 x i1> @wide_cmphs_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmphs_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmphs_h
-; CHECK: ptrue p0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphs_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp uge <vscale x 8 x i16> %a, %splat
@@ -953,9 +1064,10 @@ define <vscale x 8 x i1> @ir_cmphs_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmphs_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmphs_h
-; CHECK: cmphs p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphs_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.nxv8i16(<vscale x 8 x i1> %pg,
@@ -965,9 +1077,10 @@ define <vscale x 8 x i1> @int_cmphs_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmphs_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmphs_h
-; CHECK: cmphs p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmphs_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -977,9 +1090,11 @@ define <vscale x 8 x i1> @wide_cmphs_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmphs_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmphs_s
-; CHECK: cmphs p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphs_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp uge <vscale x 4 x i32> %a, %splat
@@ -987,9 +1102,10 @@ define <vscale x 4 x i1> @ir_cmphs_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmphs_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmphs_s
-; CHECK: cmphs p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphs_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg,
@@ -999,9 +1115,10 @@ define <vscale x 4 x i1> @int_cmphs_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmphs_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmphs_s
-; CHECK: cmphs p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmphs_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 68, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1011,9 +1128,11 @@ define <vscale x 4 x i1> @wide_cmphs_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmphs_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmphs_d
-; CHECK: cmphs p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmphs_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmphs p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp uge <vscale x 2 x i64> %a, %splat
@@ -1021,9 +1140,10 @@ define <vscale x 2 x i1> @ir_cmphs_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmphs_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmphs_d
-; CHECK: cmphs p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmphs_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmphs.nxv2i64(<vscale x 2 x i1> %pg,
@@ -1037,9 +1157,11 @@ define <vscale x 2 x i1> @int_cmphs_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmplo_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmplo_b
-; CHECK: cmplo p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplo_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmplo p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp ult <vscale x 16 x i8> %a, %splat
@@ -1047,9 +1169,10 @@ define <vscale x 16 x i1> @ir_cmplo_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmplo_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmplo_b
-; CHECK: cmplo p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplo_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg,
@@ -1059,9 +1182,10 @@ define <vscale x 16 x i1> @int_cmplo_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmplo_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmplo_b
-; CHECK: cmplo p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmplo_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -1071,20 +1195,23 @@ define <vscale x 16 x i1> @wide_cmplo_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmplo_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmplo_h
-; CHECK: pfalse p0.b
-; CHECK-NEXT: ret
-  %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
+; CHECK-LABEL: ir_cmplo_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmplo p0.h, p0/z, z0.h, #2
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x i16> undef, i16 2, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp ult <vscale x 8 x i16> %a, %splat
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @int_cmplo_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmplo_h
-; CHECK: cmplo p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
-  %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
+; CHECK-LABEL: int_cmplo_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.h, p0/z, z0.h, #3
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %splat,
@@ -1093,10 +1220,11 @@ define <vscale x 8 x i1> @int_cmplo_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmplo_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmplo_h
-; CHECK: cmplo p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
-  %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
+; CHECK-LABEL: wide_cmplo_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.h, p0/z, z0.h, #4
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmplo.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
@@ -1105,9 +1233,11 @@ define <vscale x 8 x i1> @wide_cmplo_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmplo_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmplo_s
-; CHECK: cmplo p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplo_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmplo p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp ult <vscale x 4 x i32> %a, %splat
@@ -1115,9 +1245,10 @@ define <vscale x 4 x i1> @ir_cmplo_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmplo_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmplo_s
-; CHECK: cmplo p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplo_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1127,9 +1258,10 @@ define <vscale x 4 x i1> @int_cmplo_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmplo_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmplo_s
-; CHECK: cmplo p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmplo_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 68, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmplo.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1139,9 +1271,11 @@ define <vscale x 4 x i1> @wide_cmplo_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmplo_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmplo_d
-; CHECK: cmplo p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmplo_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmplo p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp ult <vscale x 2 x i64> %a, %splat
@@ -1149,9 +1283,10 @@ define <vscale x 2 x i1> @ir_cmplo_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmplo_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmplo_d
-; CHECK: cmplo p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmplo_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmphi.nxv2i64(<vscale x 2 x i1> %pg,
@@ -1165,9 +1300,11 @@ define <vscale x 2 x i1> @int_cmplo_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 
 define <vscale x 16 x i1> @ir_cmpls_b(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: ir_cmpls_b
-; CHECK: cmpls p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpls_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpls p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = icmp ule <vscale x 16 x i8> %a, %splat
@@ -1175,9 +1312,10 @@ define <vscale x 16 x i1> @ir_cmpls_b(<vscale x 16 x i8> %a) {
 }
 
 define <vscale x 16 x i1> @int_cmpls_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: int_cmpls_b
-; CHECK: cmpls p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpls_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 16 x i8> undef, i8 4, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg,
@@ -1187,9 +1325,10 @@ define <vscale x 16 x i1> @int_cmpls_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 }
 
 define <vscale x 16 x i1> @wide_cmpls_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: wide_cmpls_b
-; CHECK: cmpls p0.b, p0/z, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpls_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.b, p0/z, z0.b, #4
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> %pg,
@@ -1199,9 +1338,11 @@ define <vscale x 16 x i1> @wide_cmpls_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 }
 
 define <vscale x 8 x i1> @ir_cmpls_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: ir_cmpls_h
-; CHECK: cmpls p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpls_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpls p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = icmp ule <vscale x 8 x i16> %a, %splat
@@ -1209,9 +1350,10 @@ define <vscale x 8 x i1> @ir_cmpls_h(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i1> @int_cmpls_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: int_cmpls_h
-; CHECK: cmpls p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpls_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.nxv8i16(<vscale x 8 x i1> %pg,
@@ -1221,9 +1363,10 @@ define <vscale x 8 x i1> @int_cmpls_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 8 x i1> @wide_cmpls_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: wide_cmpls_h
-; CHECK: cmpls p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpls_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 0, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpls.wide.nxv8i16(<vscale x 8 x i1> %pg,
@@ -1233,9 +1376,11 @@ define <vscale x 8 x i1> @wide_cmpls_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 }
 
 define <vscale x 4 x i1> @ir_cmpls_s(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: ir_cmpls_s
-; CHECK: cmpls p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpls_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpls p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = icmp ule <vscale x 4 x i32> %a, %splat
@@ -1243,9 +1388,10 @@ define <vscale x 4 x i1> @ir_cmpls_s(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i1> @int_cmpls_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: int_cmpls_s
-; CHECK: cmpls p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpls_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 4 x i32> undef, i32 68, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1255,9 +1401,10 @@ define <vscale x 4 x i1> @int_cmpls_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 4 x i1> @wide_cmpls_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: wide_cmpls_s
-; CHECK: cmpls p0.s, p0/z, z0.s, #68
-; CHECK-NEXT: ret
+; CHECK-LABEL: wide_cmpls_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.s, p0/z, z0.s, #68
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 68, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpls.wide.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1267,9 +1414,11 @@ define <vscale x 4 x i1> @wide_cmpls_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 }
 
 define <vscale x 2 x i1> @ir_cmpls_d(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: ir_cmpls_d
-; CHECK: cmpls p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: ir_cmpls_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpls p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = icmp ule <vscale x 2 x i64> %a, %splat
@@ -1277,9 +1426,10 @@ define <vscale x 2 x i1> @ir_cmpls_d(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i1> @int_cmpls_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL: int_cmpls_d
-; CHECK: cmpls p0.d, p0/z, z0.d, #127
-; CHECK-NEXT: ret
+; CHECK-LABEL: int_cmpls_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.d, p0/z, z0.d, #127
+; CHECK-NEXT:    ret
   %elt   = insertelement <vscale x 2 x i64> undef, i64 127, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmphs.nxv2i64(<vscale x 2 x i1> %pg,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
index 9dc807462d913..702249e4f606f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i1> @cmpeq_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpeq_b:
-; CHECK: cmpeq p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -16,8 +18,9 @@ define <vscale x 16 x i1> @cmpeq_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i1> @cmpeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpeq_h:
-; CHECK: cmpeq p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -26,8 +29,9 @@ define <vscale x 8 x i1> @cmpeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i1> @cmpeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpeq_s:
-; CHECK: cmpeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -36,8 +40,9 @@ define <vscale x 4 x i1> @cmpeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i1> @cmpeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_d:
-; CHECK: cmpeq p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpeq.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -46,8 +51,9 @@ define <vscale x 2 x i1> @cmpeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i1> @cmpeq_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_wide_b:
-; CHECK: cmpeq p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -56,8 +62,9 @@ define <vscale x 16 x i1> @cmpeq_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmpeq_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_wide_h:
-; CHECK: cmpeq p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -66,8 +73,9 @@ define <vscale x 8 x i1> @cmpeq_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmpeq_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_wide_s:
-; CHECK: cmpeq p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -76,36 +84,40 @@ define <vscale x 4 x i1> @cmpeq_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmpeq_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpeq_ir_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = icmp eq <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmpeq_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpeq_ir_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = icmp eq <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmpeq_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpeq_ir_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = icmp eq <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmpeq_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_ir_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = icmp eq <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
@@ -116,8 +128,9 @@ define <vscale x 2 x i1> @cmpeq_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i1> @cmpge_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpge_b:
-; CHECK: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -126,8 +139,9 @@ define <vscale x 16 x i1> @cmpge_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i1> @cmpge_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpge_h:
-; CHECK: cmpge p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -136,8 +150,9 @@ define <vscale x 8 x i1> @cmpge_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i1> @cmpge_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpge_s:
-; CHECK: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -146,8 +161,9 @@ define <vscale x 4 x i1> @cmpge_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i1> @cmpge_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_d:
-; CHECK: cmpge p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpge.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -156,8 +172,9 @@ define <vscale x 2 x i1> @cmpge_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i1> @cmpge_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_wide_b:
-; CHECK: cmpge p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -166,8 +183,9 @@ define <vscale x 16 x i1> @cmpge_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmpge_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_wide_h:
-; CHECK: cmpge p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -176,8 +194,9 @@ define <vscale x 8 x i1> @cmpge_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmpge_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_wide_s:
-; CHECK: cmpge p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -186,72 +205,80 @@ define <vscale x 4 x i1> @cmpge_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmpge_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpge_ir_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = icmp sge <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmpge_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpge_ir_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = icmp sge <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmpge_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpge_ir_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = icmp sge <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmpge_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_ir_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmpge p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpge p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = icmp sge <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 16 x i1> @cmpge_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpge_ir_comm_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmpge p0.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpge p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT:    ret
   %out = icmp sle <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmpge_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpge_ir_comm_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpge p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT:    ret
   %out = icmp sle <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmpge_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpge_ir_comm_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpge p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
   %out = icmp sle <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmpge_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_ir_comm_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmpge p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpge p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT:    ret
   %out = icmp sle <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
@@ -262,8 +289,9 @@ define <vscale x 2 x i1> @cmpge_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 16 x i1> @cmpgt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpgt_b:
-; CHECK: cmpgt p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -272,8 +300,9 @@ define <vscale x 16 x i1> @cmpgt_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i1> @cmpgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpgt_h:
-; CHECK: cmpgt p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -282,8 +311,9 @@ define <vscale x 8 x i1> @cmpgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i1> @cmpgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpgt_s:
-; CHECK: cmpgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -292,8 +322,9 @@ define <vscale x 4 x i1> @cmpgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i1> @cmpgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_d:
-; CHECK: cmpgt p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpgt.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -302,8 +333,9 @@ define <vscale x 2 x i1> @cmpgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i1> @cmpgt_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_wide_b:
-; CHECK: cmpgt p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -312,8 +344,9 @@ define <vscale x 16 x i1> @cmpgt_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmpgt_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_wide_h:
-; CHECK: cmpgt p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -322,8 +355,9 @@ define <vscale x 8 x i1> @cmpgt_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmpgt_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_wide_s:
-; CHECK: cmpgt p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -332,72 +366,80 @@ define <vscale x 4 x i1> @cmpgt_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmpgt_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpgt_ir_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = icmp sgt <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmpgt_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpgt_ir_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = icmp sgt <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmpgt_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpgt_ir_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = icmp sgt <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmpgt_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_ir_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpgt p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = icmp sgt <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 16 x i1> @cmpgt_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpgt_ir_comm_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT:    ret
   %out = icmp slt <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmpgt_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpgt_ir_comm_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT:    ret
   %out = icmp slt <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmpgt_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpgt_ir_comm_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
   %out = icmp slt <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmpgt_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_ir_comm_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpgt p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT:    ret
   %out = icmp slt <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
@@ -408,8 +450,9 @@ define <vscale x 2 x i1> @cmpgt_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 16 x i1> @cmphi_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmphi_b:
-; CHECK: cmphi p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -418,8 +461,9 @@ define <vscale x 16 x i1> @cmphi_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i1> @cmphi_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmphi_h:
-; CHECK: cmphi p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -428,8 +472,9 @@ define <vscale x 8 x i1> @cmphi_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i1> @cmphi_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphi_s:
-; CHECK: cmphi p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -438,8 +483,9 @@ define <vscale x 4 x i1> @cmphi_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i1> @cmphi_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_d:
-; CHECK: cmphi p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmphi.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -448,8 +494,9 @@ define <vscale x 2 x i1> @cmphi_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i1> @cmphi_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_wide_b:
-; CHECK: cmphi p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -458,8 +505,9 @@ define <vscale x 16 x i1> @cmphi_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmphi_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_wide_h:
-; CHECK: cmphi p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -468,8 +516,9 @@ define <vscale x 8 x i1> @cmphi_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmphi_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_wide_s:
-; CHECK: cmphi p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -478,72 +527,80 @@ define <vscale x 4 x i1> @cmphi_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmphi_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmphi_ir_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = icmp ugt <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmphi_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmphi_ir_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmphi p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = icmp ugt <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmphi_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphi_ir_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = icmp ugt <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmphi_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_ir_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmphi p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = icmp ugt <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 16 x i1> @cmphi_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmphi_ir_comm_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT:    ret
   %out = icmp ult <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmphi_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmphi_ir_comm_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmphi p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT:    ret
   %out = icmp ult <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmphi_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphi_ir_comm_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmphi p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
   %out = icmp ult <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmphi_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_ir_comm_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmphi p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT:    ret
   %out = icmp ult <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
@@ -554,8 +611,9 @@ define <vscale x 2 x i1> @cmphi_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 16 x i1> @cmphs_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmphs_b:
-; CHECK: cmphs p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -564,8 +622,9 @@ define <vscale x 16 x i1> @cmphs_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i1> @cmphs_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmphs_h:
-; CHECK: cmphs p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -574,8 +633,9 @@ define <vscale x 8 x i1> @cmphs_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i1> @cmphs_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphs_s:
-; CHECK: cmphs p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -584,8 +644,9 @@ define <vscale x 4 x i1> @cmphs_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i1> @cmphs_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_d:
-; CHECK: cmphs p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmphs.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -594,8 +655,9 @@ define <vscale x 2 x i1> @cmphs_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i1> @cmphs_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_wide_b:
-; CHECK: cmphs p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -604,8 +666,9 @@ define <vscale x 16 x i1> @cmphs_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmphs_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_wide_h:
-; CHECK: cmphs p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -614,8 +677,9 @@ define <vscale x 8 x i1> @cmphs_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmphs_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_wide_s:
-; CHECK: cmphs p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -624,72 +688,80 @@ define <vscale x 4 x i1> @cmphs_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmphs_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmphs_ir_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = icmp uge <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmphs_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmphs_ir_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmphs p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = icmp uge <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmphs_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphs_ir_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = icmp uge <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmphs_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_ir_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmphs p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmphs p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = icmp uge <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 16 x i1> @cmphs_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmphs_ir_comm_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmphs p0.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphs p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT:    ret
   %out = icmp ule <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmphs_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmphs_ir_comm_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmphs p0.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmphs p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT:    ret
   %out = icmp ule <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmphs_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphs_ir_comm_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmphs p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmphs p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
   %out = icmp ule <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmphs_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_ir_comm_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmphs p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmphs p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT:    ret
   %out = icmp ule <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
@@ -700,8 +772,9 @@ define <vscale x 2 x i1> @cmphs_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i
 
 define <vscale x 16 x i1> @cmple_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmple_wide_b:
-; CHECK: cmple p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -710,8 +783,9 @@ define <vscale x 16 x i1> @cmple_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmple_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmple_wide_h:
-; CHECK: cmple p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -720,8 +794,9 @@ define <vscale x 8 x i1> @cmple_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmple_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmple_wide_s:
-; CHECK: cmple p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmple p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -734,8 +809,9 @@ define <vscale x 4 x i1> @cmple_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmplo_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmplo_wide_b:
-; CHECK: cmplo p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -744,8 +820,9 @@ define <vscale x 16 x i1> @cmplo_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmplo_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmplo_wide_h:
-; CHECK: cmplo p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmplo.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -754,8 +831,9 @@ define <vscale x 8 x i1> @cmplo_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmplo_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmplo_wide_s:
-; CHECK: cmplo p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplo p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmplo.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -768,8 +846,9 @@ define <vscale x 4 x i1> @cmplo_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmpls_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpls_wide_b:
-; CHECK: cmpls p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -778,8 +857,9 @@ define <vscale x 16 x i1> @cmpls_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmpls_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpls_wide_h:
-; CHECK: cmpls p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpls.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -788,8 +868,9 @@ define <vscale x 8 x i1> @cmpls_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmpls_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpls_wide_s:
-; CHECK: cmpls p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpls p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpls.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -802,8 +883,9 @@ define <vscale x 4 x i1> @cmpls_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmplt_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmplt_wide_b:
-; CHECK: cmplt p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -812,8 +894,9 @@ define <vscale x 16 x i1> @cmplt_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmplt_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmplt_wide_h:
-; CHECK: cmplt p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -822,8 +905,9 @@ define <vscale x 8 x i1> @cmplt_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmplt_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmplt_wide_s:
-; CHECK: cmplt p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmplt p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmplt.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -836,8 +920,9 @@ define <vscale x 4 x i1> @cmplt_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmpne_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpne_b:
-; CHECK: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -846,8 +931,9 @@ define <vscale x 16 x i1> @cmpne_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i1> @cmpne_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpne_h:
-; CHECK: cmpne p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -856,8 +942,9 @@ define <vscale x 8 x i1> @cmpne_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i1> @cmpne_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpne_s:
-; CHECK: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -866,8 +953,9 @@ define <vscale x 4 x i1> @cmpne_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i1> @cmpne_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpne_d:
-; CHECK: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -876,8 +964,9 @@ define <vscale x 2 x i1> @cmpne_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i1> @cmpne_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpne_wide_b:
-; CHECK: cmpne p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
                                                                       <vscale x 2 x i64> %b)
@@ -886,8 +975,9 @@ define <vscale x 16 x i1> @cmpne_wide_b(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i1> @cmpne_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpne_wide_h:
-; CHECK: cmpne p0.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                      <vscale x 8 x i16> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -896,8 +986,9 @@ define <vscale x 8 x i1> @cmpne_wide_h(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i1> @cmpne_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpne_wide_s:
-; CHECK: cmpne p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b)
@@ -906,36 +997,40 @@ define <vscale x 4 x i1> @cmpne_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 16 x i1> @cmpne_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmpne_ir_b:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = icmp ne <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @cmpne_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cmpne_ir_h:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = icmp ne <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @cmpne_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpne_ir_s:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = icmp ne <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @cmpne_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpne_ir_d:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = icmp ne <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i1> %out
 }
@@ -943,8 +1038,10 @@ define <vscale x 2 x i1> @cmpne_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i1> @cmpgt_wide_splat_b(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, i64 %b) {
 ; CHECK-LABEL: cmpgt_wide_splat_b:
-; CHECK: cmpgt p0.b, p0/z, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT:    ret
   %splat = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b)
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                       <vscale x 16 x i8> %a,
@@ -954,8 +1051,10 @@ define <vscale x 16 x i1> @cmpgt_wide_splat_b(<vscale x 16 x i1> %pg, <vscale x
 
 define <vscale x 4 x i1> @cmpls_wide_splat_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, i64 %b) {
 ; CHECK-LABEL: cmpls_wide_splat_s:
-; CHECK: cmpls p0.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    cmpls p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT:    ret
   %splat = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b)
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpls.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x i32> %a,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
index e3fccea179e6a..74b94ef9ad17f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @ld1b_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_upper_bound:
-; CHECK: ld1b { z0.b }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @ld1b_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
 
 define <vscale x 16 x i8> @ld1b_inbound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_inbound:
-; CHECK: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -28,8 +31,9 @@ define <vscale x 16 x i8> @ld1b_inbound(<vscale x 16 x i1> %pg, i8* %a) {
 
 define <vscale x 4 x i32> @ld1b_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_s_inbound:
-; CHECK: ld1b { z0.s }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
   %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
@@ -40,8 +44,9 @@ define <vscale x 4 x i32> @ld1b_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
 
 define <vscale x 4 x i32> @ld1sb_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1sb_s_inbound:
-; CHECK: ld1sb { z0.s }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
   %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
@@ -52,8 +57,9 @@ define <vscale x 4 x i32> @ld1sb_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
 
 define <vscale x 16 x i8> @ld1b_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_lower_bound:
-; CHECK: ld1b { z0.b }, p0/z, [x0, #-8, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #-8, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -8
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -63,9 +69,10 @@ define <vscale x 16 x i8> @ld1b_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
 
 define <vscale x 16 x i8> @ld1b_out_of_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_out_of_upper_bound:
-; CHECK: rdvl x[[OFFSET:[0-9]+]], #8
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #8
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 8
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -75,9 +82,10 @@ define <vscale x 16 x i8> @ld1b_out_of_upper_bound(<vscale x 16 x i1> %pg, i8* %
 
 define <vscale x 16 x i8> @ld1b_out_of_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_out_of_lower_bound:
-; CHECK: rdvl x[[OFFSET:[0-9]+]], #-9
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #-9
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -9
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -91,8 +99,9 @@ define <vscale x 16 x i8> @ld1b_out_of_lower_bound(<vscale x 16 x i1> %pg, i8* %
 
 define <vscale x 8 x i16> @ld1b_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_h_inbound:
-; CHECK: ld1b { z0.h }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
   %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
@@ -103,8 +112,9 @@ define <vscale x 8 x i16> @ld1b_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
 
 define <vscale x 8 x i16> @ld1sb_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1sb_h_inbound:
-; CHECK: ld1sb { z0.h }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.h }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
   %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
@@ -115,8 +125,9 @@ define <vscale x 8 x i16> @ld1sb_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
 
 define <vscale x 8 x i16> @ld1h_inbound(<vscale x 8 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1h_inbound:
-; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 8 x i16>*
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 8 x i16>* %base to i16*
@@ -126,8 +137,9 @@ define <vscale x 8 x i16> @ld1h_inbound(<vscale x 8 x i1> %pg, i16* %a) {
 
 define <vscale x 4 x i32> @ld1h_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1h_s_inbound:
-; CHECK: ld1h { z0.s }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
   %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
@@ -138,8 +150,9 @@ define <vscale x 4 x i32> @ld1h_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
 
 define <vscale x 4 x i32> @ld1sh_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1sh_s_inbound:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
   %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
@@ -150,8 +163,9 @@ define <vscale x 4 x i32> @ld1sh_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
 
 define <vscale x 2 x i64> @ld1b_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1b_d_inbound:
-; CHECK: ld1b { z0.d }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
   %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
@@ -162,8 +176,9 @@ define <vscale x 2 x i64> @ld1b_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
 
 define <vscale x 2 x i64> @ld1sb_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1sb_d_inbound:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
   %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
@@ -174,8 +189,9 @@ define <vscale x 2 x i64> @ld1sb_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
 
 define <vscale x 2 x i64> @ld1h_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1h_d_inbound:
-; CHECK: ld1h { z0.d }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
   %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
@@ -186,8 +202,9 @@ define <vscale x 2 x i64> @ld1h_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
 
 define <vscale x 2 x i64> @ld1sh_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1sh_d_inbound:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
   %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
@@ -198,8 +215,9 @@ define <vscale x 2 x i64> @ld1sh_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
 
 define <vscale x 8 x half> @ld1h_f16_inbound(<vscale x 8 x i1> %pg, half* %a) {
 ; CHECK-LABEL: ld1h_f16_inbound:
-; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast half* %a to <vscale x 8 x half>*
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 8 x half>* %base to half*
@@ -209,8 +227,9 @@ define <vscale x 8 x half> @ld1h_f16_inbound(<vscale x 8 x i1> %pg, half* %a) {
 
 define <vscale x 8 x bfloat> @ld1h_bf16_inbound(<vscale x 8 x i1> %pg, bfloat* %a) #0 {
 ; CHECK-LABEL: ld1h_bf16_inbound:
-; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast bfloat* %a to <vscale x 8 x bfloat>*
   %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 8 x bfloat>* %base to bfloat*
@@ -224,8 +243,9 @@ define <vscale x 8 x bfloat> @ld1h_bf16_inbound(<vscale x 8 x i1> %pg, bfloat* %
 
 define <vscale x 4 x i32> @ld1w_inbound(<vscale x 4 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ld1w_inbound:
-; CHECK: ld1w { z0.s }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i32* %a to <vscale x 4 x i32>*
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x i32>* %base to i32*
@@ -235,8 +255,9 @@ define <vscale x 4 x i32> @ld1w_inbound(<vscale x 4 x i1> %pg, i32* %a) {
 
 define <vscale x 4 x float> @ld1w_f32_inbound(<vscale x 4 x i1> %pg, float* %a) {
 ; CHECK-LABEL: ld1w_f32_inbound:
-; CHECK: ld1w { z0.s }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast float* %a to <vscale x 4 x float>*
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x float>* %base to float*
@@ -250,8 +271,9 @@ define <vscale x 4 x float> @ld1w_f32_inbound(<vscale x 4 x i1> %pg, float* %a)
 
 define <vscale x 2 x i64> @ld1d_inbound(<vscale x 2 x i1> %pg, i64* %a) {
 ; CHECK-LABEL: ld1d_inbound:
-; CHECK: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i64* %a to <vscale x 2 x i64>*
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 2 x i64>* %base to i64*
@@ -261,8 +283,9 @@ define <vscale x 2 x i64> @ld1d_inbound(<vscale x 2 x i1> %pg, i64* %a) {
 
 define <vscale x 2 x i64> @ld1w_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ld1w_d_inbound:
-; CHECK: ld1w { z0.d }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
   %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
@@ -273,8 +296,9 @@ define <vscale x 2 x i64> @ld1w_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
 
 define <vscale x 2 x i64> @ld1sw_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ld1sw_d_inbound:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
   %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
@@ -285,8 +309,9 @@ define <vscale x 2 x i64> @ld1sw_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
 
 define <vscale x 2 x double> @ld1d_f64_inbound(<vscale x 2 x i1> %pg, double* %a) {
 ; CHECK-LABEL: ld1d_f64_inbound:
-; CHECK: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast double* %a to <vscale x 2 x double>*
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 2 x double>* %base to double*

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
index a47da1c004ca4..68dd37185317f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -5,9 +6,10 @@
 ;
 
 define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pg, i8* %a, i64 %index) {
-; CHECK-LABEL: ld1b_i8
-; CHECK: ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld1b_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load
@@ -15,8 +17,9 @@ define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pg, i8* %a, i64 %index) {
 
 define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1b_h:
-; CHECK: ld1b { z0.h }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, i8* %base)
   %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
@@ -25,8 +28,9 @@ define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, i8* %a, i64 %index) {
 
 define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1sb_h:
-; CHECK: ld1sb { z0.h }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.h }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, i8* %base)
   %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
@@ -35,8 +39,9 @@ define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, i8* %a, i64 %index)
 
 define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1b_s:
-; CHECK: ld1b { z0.s }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, i8* %base)
   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
@@ -45,8 +50,9 @@ define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, i8* %a, i64 %index) {
 
 define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1sb_s:
-; CHECK: ld1sb { z0.s }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, i8* %base)
   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
@@ -55,8 +61,9 @@ define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, i8* %a, i64 %index)
 
 define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1b_d:
-; CHECK: ld1b { z0.d }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, i8* %base)
   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
@@ -65,8 +72,9 @@ define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, i8* %a, i64 %index) {
 
 define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1sb_d:
-; CHECK: ld1sb { z0.d }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, i8* %base)
   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
@@ -78,27 +86,30 @@ define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, i8* %a, i64 %index)
 ;
 
 define <vscale x 8 x i16> @ld1h_i16(<vscale x 8 x i1> %pg, i16* %a, i64 %index) {
-; CHECK-LABEL: ld1h_i16
-; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld1h_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pg, i16* %base)
   ret <vscale x 8 x i16> %load
 }
 
 define <vscale x 8 x half> @ld1h_f16(<vscale x 8 x i1> %pg, half* %a, i64 %index) {
-; CHECK-LABEL: ld1h_f16
-; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld1h_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr half, half* %a, i64 %index
   %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pg, half* %base)
   ret <vscale x 8 x half> %load
 }
 
 define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64 %index) #0 {
-; CHECK-LABEL: ld1h_bf16
-; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld1h_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr bfloat, bfloat* %a, i64 %index
   %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base)
   ret <vscale x 8 x bfloat> %load
@@ -106,8 +117,9 @@ define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64 %
 
 define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, i16* %a, i64 %index) {
 ; CHECK-LABEL: ld1h_s:
-; CHECK: ld1h { z0.s }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, i16* %base)
   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
@@ -116,8 +128,9 @@ define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, i16* %a, i64 %index)
 
 define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, i16* %a, i64 %index) {
 ; CHECK-LABEL: ld1sh_s:
-; CHECK: ld1sh { z0.s }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, i16* %base)
   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
@@ -126,8 +139,9 @@ define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, i16* %a, i64 %index)
 
 define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, i16* %a, i64 %index) {
 ; CHECK-LABEL: ld1h_d:
-; CHECK: ld1h { z0.d }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, i16* %base)
   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
@@ -136,8 +150,9 @@ define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, i16* %a, i64 %index)
 
 define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, i16* %a, i64 %index) {
 ; CHECK-LABEL: ld1sh_d:
-; CHECK: ld1sh { z0.d }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, i16* %base)
   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
@@ -168,8 +183,9 @@ define<vscale x 4 x float> @ld1w_f32(<vscale x 4 x i1> %pg, float* %a, i64 %inde
 
 define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, i32* %a, i64 %index) {
 ; CHECK-LABEL: ld1w_d:
-; CHECK: ld1w { z0.d }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %index
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, i32* %base)
   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
@@ -178,8 +194,9 @@ define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, i32* %a, i64 %index)
 
 define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, i32* %a, i64 %index) {
 ; CHECK-LABEL: ld1sw_d:
-; CHECK: ld1sw { z0.d }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %index
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, i32* %base)
   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
@@ -191,18 +208,20 @@ define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, i32* %a, i64 %index)
 ;
 
 define <vscale x 2 x i64> @ld1d(<vscale x 2 x i1> %pg, i64* %a, i64 %index) {
-; CHECK-LABEL: ld1d
-; CHECK: ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr i64, i64* %a, i64 %index
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pg, i64* %base)
   ret <vscale x 2 x i64> %load
 }
 
 define <vscale x 2 x double> @ld1d_f64(<vscale x 2 x i1> %pg, double* %a, i64 %index) {
-; CHECK-LABEL: ld1d_f64
-; CHECK: ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK-LABEL: ld1d_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr double, double* %a, i64 %index
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pg, double* %base)
   ret <vscale x 2 x double> %load

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
index 69f20fa5c13e3..c2e43c830c2a1 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
@@ -7,16 +8,18 @@
 
 define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1b_i8:
-; CHECK: ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pred, i8* %addr)
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1b_h:
-; CHECK: ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, i8* %addr)
   %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %res
@@ -24,8 +27,9 @@ define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, i8* %addr) {
 
 define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1sb_h:
-; CHECK: ld1sb { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, i8* %addr)
   %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %res
@@ -33,8 +37,9 @@ define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, i8* %addr) {
 
 define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1b_s:
-; CHECK: ld1b { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, i8* %addr)
   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -42,8 +47,9 @@ define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, i8* %addr) {
 
 define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1sb_s:
-; CHECK: ld1sb { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, i8* %addr)
   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -51,8 +57,9 @@ define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, i8* %addr) {
 
 define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1b_d:
-; CHECK: ld1b { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, i8* %addr)
   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -60,8 +67,9 @@ define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, i8* %addr) {
 
 define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1sb_d:
-; CHECK: ld1sb { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, i8* %addr)
   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -73,32 +81,36 @@ define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, i8* %addr) {
 
 define <vscale x 8 x i16> @ld1h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1h_i16:
-; CHECK: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pred, i16* %addr)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 8 x half> @ld1h_f16(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ld1h_f16:
-; CHECK: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pred, half* %addr)
   ret <vscale x 8 x half> %res
 }
 
 define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 ; CHECK-LABEL: ld1h_bf16:
-; CHECK: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
   ret <vscale x 8 x bfloat> %res
 }
 
 define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1h_s:
-; CHECK: ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, i16* %addr)
   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -106,8 +118,9 @@ define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, i16* %addr) {
 
 define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1sh_s:
-; CHECK: ld1sh { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, i16* %addr)
   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -115,8 +128,9 @@ define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, i16* %addr) {
 
 define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1h_d:
-; CHECK: ld1h { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, i16* %addr)
   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -124,8 +138,9 @@ define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, i16* %addr) {
 
 define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1sh_d:
-; CHECK: ld1sh { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, i16* %addr)
   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -137,24 +152,27 @@ define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, i16* %addr) {
 
 define <vscale x 4 x i32> @ld1w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld1w_i32:
-; CHECK: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pred, i32* %addr)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 4 x float> @ld1w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ld1w_f32:
-; CHECK: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pred, float* %addr)
   ret <vscale x 4 x float> %res
 }
 
 define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld1w_d:
-; CHECK: ld1w { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, i32* %addr)
   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -162,8 +180,9 @@ define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, i32* %addr) {
 
 define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld1sw_d:
-; CHECK: ld1sw { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, i32* %addr)
   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -175,8 +194,9 @@ define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, i32* %addr) {
 
 define <vscale x 2 x i64> @ld1d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ld1d_i64:
-; CHECK: ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pred,
                                                                i64* %addr)
   ret <vscale x 2 x i64> %res
@@ -184,8 +204,9 @@ define <vscale x 2 x i64> @ld1d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 
 define <vscale x 2 x double> @ld1d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ld1d_f64:
-; CHECK: ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pred,
                                                                   double* %addr)
   ret <vscale x 2 x double> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
index 70f34cf6d331d..f5c4fbed0293f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
@@ -1,12 +1,15 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm < %s | FileCheck %s
+
 ;
 ; LD1ROB
 ;
 
-define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a) nounwind {
+define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1rob_i8:
-; CHECK-NEXT:  ld1rob { z0.b }, p0/z, [x0, #32]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rob { z0.b }, p0/z, [x0, #32]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 32
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load
@@ -16,28 +19,31 @@ define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a) nounwind {
 ; LD1ROH
 ;
 
-define <vscale x 8 x i16> @ld1roh_i16(<vscale x 8 x i1> %pg, i16* %a) nounwind {
+define <vscale x 8 x i16> @ld1roh_i16(<vscale x 8 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1roh_i16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, #64]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, #64]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 32
   %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pg, i16* %base)
   ret <vscale x 8 x i16> %load
 }
 
-define <vscale x 8 x half> @ld1roh_f16(<vscale x 8 x i1> %pg, half* %a) nounwind {
+define <vscale x 8 x half> @ld1roh_f16(<vscale x 8 x i1> %pg, half* %a) {
 ; CHECK-LABEL: ld1roh_f16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, #64]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, #64]
+; CHECK-NEXT:    ret
   %base = getelementptr half, half* %a, i64 32
   %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1ro.nxv8f16(<vscale x 8 x i1> %pg, half* %base)
   ret <vscale x 8 x half> %load
 }
 
-define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a) nounwind #0 {
+define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a) #0 {
 ; CHECK-LABEL: ld1roh_bf16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, #64]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, #64]
+; CHECK-NEXT:    ret
   %base = getelementptr bfloat, bfloat* %a, i64 32
   %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1ro.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base)
   ret <vscale x 8 x bfloat> %load
@@ -47,19 +53,21 @@ define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a) nou
 ; LD1ROW
 ;
 
-define<vscale x 4 x i32> @ld1row_i32(<vscale x 4 x i1> %pg, i32* %a) nounwind {
+define <vscale x 4 x i32> @ld1row_i32(<vscale x 4 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ld1row_i32:
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0, #128]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0, #128]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 32
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pg, i32* %base)
   ret <vscale x 4 x i32> %load
 }
 
-define<vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a) nounwind {
+define <vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a) {
 ; CHECK-LABEL: ld1row_f32:
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0, #128]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0, #128]
+; CHECK-NEXT:    ret
   %base = getelementptr float, float* %a, i64 32
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1ro.nxv4f32(<vscale x 4 x i1> %pg, float* %base)
   ret <vscale x 4 x float> %load
@@ -69,19 +77,21 @@ define<vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a) nounwin
 ; LD1ROD
 ;
 
-define <vscale x 2 x i64> @ld1rod_i64(<vscale x 2 x i1> %pg, i64* %a) nounwind {
+define <vscale x 2 x i64> @ld1rod_i64(<vscale x 2 x i1> %pg, i64* %a) {
 ; CHECK-LABEL: ld1rod_i64:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0, #-64]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0, #-64]
+; CHECK-NEXT:    ret
   %base = getelementptr i64, i64* %a, i64 -8
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1> %pg, i64* %base)
   ret <vscale x 2 x i64> %load
 }
 
-define <vscale x 2 x double> @ld1rod_f64(<vscale x 2 x i1> %pg, double* %a) nounwind {
+define <vscale x 2 x double> @ld1rod_f64(<vscale x 2 x i1> %pg, double* %a) {
 ; CHECK-LABEL: ld1rod_f64:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0, #-128]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0, #-128]
+; CHECK-NEXT:    ret
   %base = getelementptr double, double* %a, i64 -16
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> %pg, double* %base)
   ret <vscale x 2 x double> %load
@@ -92,62 +102,68 @@ define <vscale x 2 x double> @ld1rod_f64(<vscale x 2 x i1> %pg, double* %a) noun
 ; range checks: immediate must be a multiple of 32 in the range -256, ..., 224
 
 ; lower bound
-define <vscale x 16 x i8> @ld1rob_i8_lower_bound(<vscale x 16 x i1> %pg, i8* %a) nounwind {
+define <vscale x 16 x i8> @ld1rob_i8_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1rob_i8_lower_bound:
-; CHECK-NEXT:  ld1rob { z0.b }, p0/z, [x0, #-256]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rob { z0.b }, p0/z, [x0, #-256]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 -256
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load
 }
 
 ; below lower bound
-define <vscale x 8 x i16> @ld1roh_i16_below_lower_bound(<vscale x 8 x i1> %pg, i16* %a) nounwind {
+define <vscale x 8 x i16> @ld1roh_i16_below_lower_bound(<vscale x 8 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ld1roh_i16_below_lower_bound:
-; CHECK-NEXT:  mov     x[[IDX:[0-9]+]], #-129
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, x[[IDX]], lsl #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-129
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, x8, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 -129
   %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pg, i16* %base)
   ret <vscale x 8 x i16> %load
 }
 
-define <vscale x 16 x i8> @ld1rob_i8_below_lower_bound_01(<vscale x 16 x i1> %pg, i8* %a) nounwind {
+define <vscale x 16 x i8> @ld1rob_i8_below_lower_bound_01(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1rob_i8_below_lower_bound_01:
-; CHECK-NEXT:  mov x[[OFFSET:[0-9]+]], #-257
-; CHECK-NEXT:  ld1rob { z0.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-257
+; CHECK-NEXT:    ld1rob { z0.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 -257
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load
 }
 
 ; not a multiple of 32
-define<vscale x 4 x i32> @ld1row_i32_not_multiple(<vscale x 4 x i1> %pg, i32* %a) nounwind {
+define <vscale x 4 x i32> @ld1row_i32_not_multiple(<vscale x 4 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ld1row_i32_not_multiple:
-; CHECK-NEXT:  mov x[[IDX:[0-9]+]], #3
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0, x[[IDX]], lsl #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #3
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 3
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pg, i32* %base)
   ret <vscale x 4 x i32> %load
 }
 
 ; upper bound
-define <vscale x 2 x i64> @ld1rod_i64_upper_bound(<vscale x 2 x i1> %pg, i64* %a) nounwind {
+define <vscale x 2 x i64> @ld1rod_i64_upper_bound(<vscale x 2 x i1> %pg, i64* %a) {
 ; CHECK-LABEL: ld1rod_i64_upper_bound:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0, #224]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0, #224]
+; CHECK-NEXT:    ret
   %base = getelementptr i64, i64* %a, i64 28
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1> %pg, i64* %base)
   ret <vscale x 2 x i64> %load
 }
 
-define <vscale x 16 x i8> @ld1rob_i8_beyond_upper_bound(<vscale x 16 x i1> %pg, i8* %a) nounwind {
+define <vscale x 16 x i8> @ld1rob_i8_beyond_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ld1rob_i8_beyond_upper_bound:
-; CHECK-NEXT:  mov w[[OFFSET:[0-9]+]], #225
-; CHECK-NEXT:  ld1rob { z0.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #225
+; CHECK-NEXT:    ld1rob { z0.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 225
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
index b4ac587c0b794..603488cb59fa9 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
@@ -1,13 +1,15 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm,+bf16 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm < %s | FileCheck %s
 
 ;
 ; LD1ROB
 ;
 
-define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a, i64 %index) nounwind {
+define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a, i64 %index) {
 ; CHECK-LABEL: ld1rob_i8:
-; CHECK-NEXT:  ld1rob { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rob { z0.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load
@@ -17,29 +19,31 @@ define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a, i64 %index)
 ; LD1ROH
 ;
 
-define <vscale x 8 x i16> @ld1roh_i16(<vscale x 8 x i1> %pg, i16* %a, i64 %index) nounwind {
+define <vscale x 8 x i16> @ld1roh_i16(<vscale x 8 x i1> %pg, i16* %a, i64 %index) {
 ; CHECK-LABEL: ld1roh_i16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pg, i16* %base)
   ret <vscale x 8 x i16> %load
 }
 
-define <vscale x 8 x half> @ld1roh_f16(<vscale x 8 x i1> %pg, half* %a, i64 %index) nounwind {
+define <vscale x 8 x half> @ld1roh_f16(<vscale x 8 x i1> %pg, half* %a, i64 %index) {
 ; CHECK-LABEL: ld1roh_f16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr half, half* %a, i64 %index
   %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1ro.nxv8f16(<vscale x 8 x i1> %pg, half* %base)
   ret <vscale x 8 x half> %load
 }
 
-; bfloat - requires -mattr=+bf16
-define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64 %index) nounwind {
+define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64 %index) #0 {
 ; CHECK-LABEL: ld1roh_bf16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr bfloat, bfloat* %a, i64 %index
   %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1ro.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base)
   ret <vscale x 8 x bfloat> %load
@@ -49,19 +53,21 @@ define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64
 ; LD1ROW
 ;
 
-define<vscale x 4 x i32> @ld1row_i32(<vscale x 4 x i1> %pg, i32* %a, i64 %index) nounwind {
+define <vscale x 4 x i32> @ld1row_i32(<vscale x 4 x i1> %pg, i32* %a, i64 %index) {
 ; CHECK-LABEL: ld1row_i32:
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %index
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pg, i32* %base)
   ret <vscale x 4 x i32> %load
 }
 
-define<vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a, i64 %index) nounwind {
+define <vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a, i64 %index) {
 ; CHECK-LABEL: ld1row_f32:
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr float, float* %a, i64 %index
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1ro.nxv4f32(<vscale x 4 x i1> %pg, float* %base)
   ret <vscale x 4 x float> %load
@@ -71,19 +77,21 @@ define<vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a, i64 %in
 ; LD1ROD
 ;
 
-define <vscale x 2 x i64> @ld1rod_i64(<vscale x 2 x i1> %pg, i64* %a, i64 %index) nounwind {
+define <vscale x 2 x i64> @ld1rod_i64(<vscale x 2 x i1> %pg, i64* %a, i64 %index) {
 ; CHECK-LABEL: ld1rod_i64:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr i64, i64* %a, i64 %index
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1> %pg, i64* %base)
   ret <vscale x 2 x i64> %load
 }
 
-define <vscale x 2 x double> @ld1rod_f64(<vscale x 2 x i1> %pg, double* %a, i64 %index) nounwind {
+define <vscale x 2 x double> @ld1rod_f64(<vscale x 2 x i1> %pg, double* %a, i64 %index) {
 ; CHECK-LABEL: ld1rod_f64:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr double, double* %a, i64 %index
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> %pg, double* %base)
   ret <vscale x 2 x double> %load
@@ -100,3 +108,6 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.ld1ro.nxv4f32(<vscale x 4 x i1>,
 
 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1>, i64*)
 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1>, double*)
+
+; +bf16 is required for the bfloat version.
+attributes #0 = { "target-features"="+sve,+f64mm,+bf16" }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
index 50b7c003f4d74..8c180d6367459 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm < %s | FileCheck %s
 
 ;
 ; LD1ROB
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pred, i8* %addr) nounwind {
 ; CHECK-LABEL: ld1rob_i8:
-; CHECK-NEXT:  ld1rob { z0.b }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rob { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pred, i8* %addr)
   ret <vscale x 16 x i8> %res
 }
@@ -18,16 +20,18 @@ define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pred, i8* %addr) nounwi
 
 define <vscale x 8 x i16> @ld1roh_i16(<vscale x 8 x i1> %pred, i16* %addr) nounwind {
 ; CHECK-LABEL: ld1roh_i16:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pred, i16* %addr)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 8 x half> @ld1roh_half(<vscale x 8 x i1> %pred, half* %addr) nounwind {
 ; CHECK-LABEL: ld1roh_half:
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1roh { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1ro.nxv8f16(<vscale x 8 x i1> %pred, half* %addr)
   ret <vscale x 8 x half> %res
 }
@@ -38,16 +42,18 @@ define <vscale x 8 x half> @ld1roh_half(<vscale x 8 x i1> %pred, half* %addr) no
 
 define <vscale x 4 x i32> @ld1row_i32(<vscale x 4 x i1> %pred, i32* %addr) nounwind {
 ; CHECK-LABEL: ld1row_i32:
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pred, i32* %addr)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 4 x float> @ld1row_float(<vscale x 4 x i1> %pred, float* %addr) nounwind {
 ; CHECK-LABEL: ld1row_float:
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1row { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1ro.nxv4f32(<vscale x 4 x i1> %pred, float* %addr)
   ret <vscale x 4 x float> %res
 }
@@ -58,16 +64,18 @@ define <vscale x 4 x float> @ld1row_float(<vscale x 4 x i1> %pred, float* %addr)
 
 define <vscale x 2 x i64> @ld1rod_i64(<vscale x 2 x i1> %pred, i64* %addr) nounwind {
 ; CHECK-LABEL: ld1rod_i64:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1> %pred, i64* %addr)
   ret <vscale x 2 x i64> %res
 }
 
 define <vscale x 2 x double> @ld1rod_double(<vscale x 2 x i1> %pred, double* %addr) nounwind {
 ; CHECK-LABEL: ld1rod_double:
-; CHECK-NEXT:  ld1rod { z0.d }, p0/z, [x0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rod { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> %pred, double* %addr)
   ret <vscale x 2 x double> %res
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll
index 0e8cf3d842ce2..18e552e5f4a3a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s
 
@@ -10,8 +11,9 @@
 ; ld2b
 define <vscale x 32 x i8> @ld2.nxv32i8(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld2.nxv32i8:
-; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8*
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -20,8 +22,9 @@ ret <vscale x 32 x i8> %res
 
 define <vscale x 32 x i8> @ld2.nxv32i8_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld2.nxv32i8_lower_bound:
-; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0, #-16, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, #-16, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -30,8 +33,9 @@ ret <vscale x 32 x i8> %res
 
 define <vscale x 32 x i8> @ld2.nxv32i8_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld2.nxv32i8_upper_bound:
-; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0, #14, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, #14, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -40,9 +44,10 @@ ret <vscale x 32 x i8> %res
 
 define <vscale x 32 x i8> @ld2.nxv32i8_not_multiple_of_2(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld2.nxv32i8_not_multiple_of_2:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #3
-; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #3
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -51,9 +56,10 @@ ret <vscale x 32 x i8> %res
 
 define <vscale x 32 x i8> @ld2.nxv32i8_outside_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld2.nxv32i8_outside_lower_bound:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #-18
-; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #-18
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -62,9 +68,10 @@ ret <vscale x 32 x i8> %res
 
 define <vscale x 32 x i8> @ld2.nxv32i8_outside_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld2.nxv32i8_outside_upper_bound:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #16
-; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #16
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -74,8 +81,9 @@ ret <vscale x 32 x i8> %res
 ; ld2h
 define <vscale x 16 x i16> @ld2.nxv16i16(<vscale x 8 x i1> %Pg, <vscale x 8 x i16>* %addr) {
 ; CHECK-LABEL: ld2.nxv16i16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0, #14, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0, #14, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 14
 %base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
 %res = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1> %Pg, i16 *%base_ptr)
@@ -84,8 +92,9 @@ ret <vscale x 16 x i16> %res
 
 define <vscale x 16 x half> @ld2.nxv16f16(<vscale x 8 x i1> %Pg, <vscale x 8 x half>* %addr) {
 ; CHECK-LABEL: ld2.nxv16f16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0, #-16, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0, #-16, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 -16
 %base_ptr = bitcast <vscale x 8 x half>* %base to half *
 %res = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1> %Pg, half *%base_ptr)
@@ -94,8 +103,9 @@ ret <vscale x 16 x half> %res
 
 define <vscale x 16 x bfloat> @ld2.nxv16bf16(<vscale x 8 x i1> %Pg, <vscale x 8 x bfloat>* %addr) #0 {
 ; CHECK-LABEL: ld2.nxv16bf16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0, #12, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0, #12, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 12
 %base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
 %res = call <vscale x 16 x bfloat> @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %Pg, bfloat *%base_ptr)
@@ -105,8 +115,9 @@ ret <vscale x 16 x bfloat> %res
 ; ld2w
 define <vscale x 8 x i32> @ld2.nxv8i32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32>* %addr) {
 ; CHECK-LABEL: ld2.nxv8i32:
-; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0, #14, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0, #14, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 14
 %base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
 %res = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1> %Pg, i32 *%base_ptr)
@@ -115,8 +126,9 @@ ret <vscale x 8 x i32> %res
 
 define <vscale x 8 x float> @ld2.nxv8f32(<vscale x 4 x i1> %Pg, <vscale x 4 x float>* %addr) {
 ; CHECK-LABEL: ld2.nxv8f32:
-; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0, #-16, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0, #-16, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -16
 %base_ptr = bitcast <vscale x 4 x float>* %base to float *
 %res = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1> %Pg, float *%base_ptr)
@@ -126,8 +138,9 @@ ret <vscale x 8 x float> %res
 ; ld2d
 define <vscale x 4 x i64> @ld2.nxv4i64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64>* %addr) {
 ; CHECK-LABEL: ld2.nxv4i64:
-; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0, #14, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x0, #14, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 14
 %base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
 %res = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1> %Pg, i64 *%base_ptr)
@@ -136,8 +149,9 @@ ret <vscale x 4 x i64> %res
 
 define <vscale x 4 x double> @ld2.nxv4f64(<vscale x 2 x i1> %Pg, <vscale x 2 x double>* %addr) {
 ; CHECK-LABEL: ld2.nxv4f64:
-; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0, #-16, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x0, #-16, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -16
 %base_ptr = bitcast <vscale x 2 x double>* %base to double *
 %res = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1> %Pg, double *%base_ptr)
@@ -147,8 +161,9 @@ ret <vscale x 4 x double> %res
 ; ld3b
 define <vscale x 48 x i8> @ld3.nxv48i8(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8:
-; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #3, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -157,8 +172,9 @@ ret <vscale x 48 x i8> %res
 
 define <vscale x 48 x i8> @ld3.nxv48i8_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8_lower_bound:
-; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #-24, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #-24, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -167,8 +183,9 @@ ret <vscale x 48 x i8> %res
 
 define <vscale x 48 x i8> @ld3.nxv48i8_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8_upper_bound:
-; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #21, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #21, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -177,9 +194,10 @@ ret <vscale x 48 x i8> %res
 
 define <vscale x 48 x i8> @ld3.nxv48i8_not_multiple_of_3_01(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_01:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #4
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #4
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -188,9 +206,10 @@ ret <vscale x 48 x i8> %res
 
 define <vscale x 48 x i8> @ld3.nxv48i8_not_multiple_of_3_02(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_02:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #5
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #5
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -199,9 +218,10 @@ ret <vscale x 48 x i8> %res
 
 define <vscale x 48 x i8> @ld3.nxv48i8_outside_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8_outside_lower_bound:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #-27
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #-27
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -210,9 +230,10 @@ ret <vscale x 48 x i8> %res
 
 define <vscale x 48 x i8> @ld3.nxv48i8_outside_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld3.nxv48i8_outside_upper_bound:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #24
-; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #24
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -222,8 +243,9 @@ ret <vscale x 48 x i8> %res
 ; ld3h
 define <vscale x 24 x i16> @ld3.nxv24i16(<vscale x 8 x i1> %Pg, <vscale x 8 x i16> *%addr) {
 ; CHECK-LABEL: ld3.nxv24i16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #21, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #21, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 21
 %base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
 %res = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1> %Pg, i16 *%base_ptr)
@@ -232,8 +254,9 @@ ret <vscale x 24 x i16> %res
 
 define <vscale x 24 x half> @ld3.nxv24f16(<vscale x 8 x i1> %Pg, <vscale x 8 x half> *%addr) {
 ; CHECK-LABEL: ld3.nxv24f16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #21, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #21, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 21
 %base_ptr = bitcast <vscale x 8 x half>* %base to half *
 %res = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1> %Pg, half *%base_ptr)
@@ -242,8 +265,9 @@ ret <vscale x 24 x half> %res
 
 define <vscale x 24 x bfloat> @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, <vscale x 8 x bfloat> *%addr) #0 {
 ; CHECK-LABEL: ld3.nxv24bf16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #-24, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #-24, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -24
 %base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
 %res = call <vscale x 24 x bfloat> @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %Pg, bfloat *%base_ptr)
@@ -253,8 +277,9 @@ ret <vscale x 24 x bfloat> %res
 ; ld3w
 define <vscale x 12 x i32> @ld3.nxv12i32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> *%addr) {
 ; CHECK-LABEL: ld3.nxv12i32:
-; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, #21, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, #21, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 21
 %base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
 %res = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1> %Pg, i32 *%base_ptr)
@@ -263,8 +288,9 @@ ret <vscale x 12 x i32> %res
 
 define <vscale x 12 x float> @ld3.nxv12f32(<vscale x 4 x i1> %Pg, <vscale x 4 x float> *%addr) {
 ; CHECK-LABEL: ld3.nxv12f32:
-; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, #-24, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, #-24, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -24
 %base_ptr = bitcast <vscale x 4 x float>* %base to float *
 %res = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1> %Pg, float *%base_ptr)
@@ -274,8 +300,9 @@ ret <vscale x 12 x float> %res
 ; ld3d
 define <vscale x 6 x i64> @ld3.nxv6i64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> *%addr) {
 ; CHECK-LABEL: ld3.nxv6i64:
-; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, #21, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, #21, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 21
 %base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
 %res = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1> %Pg, i64 *%base_ptr)
@@ -284,8 +311,9 @@ ret <vscale x 6 x i64> %res
 
 define <vscale x 6 x double> @ld3.nxv6f64(<vscale x 2 x i1> %Pg, <vscale x 2 x double> *%addr) {
 ; CHECK-LABEL: ld3.nxv6f64:
-; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, #-24, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, #-24, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -24
 %base_ptr = bitcast <vscale x 2 x double>* %base to double *
 %res = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1> %Pg, double *%base_ptr)
@@ -295,8 +323,9 @@ ret <vscale x 6 x double> %res
 ; ; ld4b
 define <vscale x 64 x i8> @ld4.nxv64i8(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8:
-; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #4, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -305,8 +334,9 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_lower_bound:
-; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #-32, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #-32, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -315,8 +345,9 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_upper_bound:
-; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #28, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #28, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -325,9 +356,10 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_not_multiple_of_4_01(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_01:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #5
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #5
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -336,9 +368,10 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_not_multiple_of_4_02(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_02:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #6
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #6
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -347,9 +380,10 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_not_multiple_of_4_03(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_03:
-; CHECK:      rdvl x[[OFFSET:[0-9]]], #7
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #7
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -358,16 +392,17 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_outside_lower_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_outside_lower_bound:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x9, #-576
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
 ; xM = -9 * 2^6
-; xP = RDVL * 2^-4 
+; xP = RDVL * 2^-4
 ; xOFFSET = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
-; CHECK:      rdvl x[[N:[0-9]]], #1
-; CHECK-DAG:  mov  x[[M:[0-9]]], #-576
-; CHECK-DAG:  lsr  x[[P:[0-9]]], x[[N]], #4
-; CHECK-DAG:  mul  x[[OFFSET:[0-9]]], x[[P]], x[[M]]
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -376,16 +411,17 @@ ret <vscale x 64 x i8> %res
 
 define <vscale x 64 x i8> @ld4.nxv64i8_outside_upper_bound(<vscale x 16 x i1> %Pg, <vscale x 16 x i8> *%addr) {
 ; CHECK-LABEL: ld4.nxv64i8_outside_upper_bound:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov w9, #512
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8]
+; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #16) #2)
 ; xM = 2^9
 ; xP = RDVL * 2^-4
 ; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
-; CHECK:      rdvl x[[N:[0-9]]], #1
-; CHECK-DAG:  mov  w[[M:[0-9]]], #512
-; CHECK-DAG:  lsr  x[[P:[0-9]]], x[[N]], #4
-; CHECK-DAG:  mul  x[[OFFSET:[0-9]]], x[[P]], x[[M]]
-; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32
 %base_ptr = bitcast <vscale x 16 x i8>* %base to i8 *
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%base_ptr)
@@ -395,8 +431,9 @@ ret <vscale x 64 x i8> %res
 ; ld4h
 define <vscale x 32 x i16> @ld4.nxv32i16(<vscale x 8 x i1> %Pg, <vscale x 8 x i16> *%addr) {
 ; CHECK-LABEL: ld4.nxv32i16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #8, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #8, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
 %base_ptr = bitcast <vscale x 8 x i16>* %base to i16 *
 %res = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1> %Pg, i16 *%base_ptr)
@@ -405,8 +442,9 @@ ret <vscale x 32 x i16> %res
 
 define <vscale x 32 x half> @ld4.nxv32f16(<vscale x 8 x i1> %Pg, <vscale x 8 x half> *%addr) {
 ; CHECK-LABEL: ld4.nxv32f16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #28, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #28, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 28
 %base_ptr = bitcast <vscale x 8 x half>* %base to half *
 %res = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1> %Pg, half *%base_ptr)
@@ -415,8 +453,9 @@ ret <vscale x 32 x half> %res
 
 define <vscale x 32 x bfloat> @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, <vscale x 8 x bfloat> *%addr) #0 {
 ; CHECK-LABEL: ld4.nxv32bf16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #-32, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #-32, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %addr, i64 -32
 %base_ptr = bitcast <vscale x 8 x bfloat>* %base to bfloat *
 %res = call <vscale x 32 x bfloat> @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %Pg, bfloat *%base_ptr)
@@ -426,8 +465,9 @@ ret <vscale x 32 x bfloat> %res
 ; ld4w
 define <vscale x 16 x i32> @ld4.nxv16i32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> *%addr) {
 ; CHECK-LABEL: ld4.nxv16i32:
-; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, #28, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, #28, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 28
 %base_ptr = bitcast <vscale x 4 x i32>* %base to i32 *
 %res = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1> %Pg, i32 *%base_ptr)
@@ -436,8 +476,9 @@ ret <vscale x 16 x i32> %res
 
 define <vscale x 16 x float> @ld4.nxv16f32(<vscale x 4 x i1> %Pg, <vscale x 4 x float>* %addr) {
 ; CHECK-LABEL: ld4.nxv16f32:
-; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, #-32, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, #-32, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 -32
 %base_ptr = bitcast <vscale x 4 x float>* %base to float *
 %res = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1> %Pg, float *%base_ptr)
@@ -447,8 +488,9 @@ ret <vscale x 16 x float> %res
 ; ld4d
 define <vscale x 8 x i64> @ld4.nxv8i64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> *%addr) {
 ; CHECK-LABEL: ld4.nxv8i64:
-; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, #28, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, #28, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 28
 %base_ptr = bitcast <vscale x 2 x i64>* %base to i64 *
 %res = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1> %Pg, i64 *%base_ptr)
@@ -457,8 +499,9 @@ ret <vscale x 8 x i64> %res
 
 define <vscale x 8 x double> @ld4.nxv8f64(<vscale x 2 x i1> %Pg, <vscale x 2 x double> *%addr) {
 ; CHECK-LABEL: ld4.nxv8f64:
-; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, #-32, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, #-32, mul vl]
+; CHECK-NEXT:    ret
 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -32
 %base_ptr = bitcast <vscale x 2 x double>* %base to double *
 %res = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1> %Pg, double * %base_ptr)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll
index dd2c71befccd2..bf207c66d5332 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll
@@ -1,11 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s
 
 ; ld2b
 define <vscale x 32 x i8> @ld2.nxv32i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv32i8:
-; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i8, i8 *  %addr, i64 %a
 %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
 ret <vscale x 32 x i8> %res
@@ -14,8 +16,9 @@ ret <vscale x 32 x i8> %res
 ; ld2h
 define <vscale x 16 x i16> @ld2.nxv16i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv16i16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i16, i16 *  %addr, i64 %a
 %res = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
 ret <vscale x 16 x i16> %res
@@ -23,8 +26,9 @@ ret <vscale x 16 x i16> %res
 
 define <vscale x 16 x half> @ld2.nxv16f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv16f16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr half, half *  %addr, i64 %a
 %res = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1> %Pg, half *%addr2)
 ret <vscale x 16 x half> %res
@@ -32,8 +36,9 @@ ret <vscale x 16 x half> %res
 
 define <vscale x 16 x bfloat> @ld2.nxv16bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
 ; CHECK-LABEL: ld2.nxv16bf16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr bfloat, bfloat *  %addr, i64 %a
 %res = call <vscale x 16 x bfloat> @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
 ret <vscale x 16 x bfloat> %res
@@ -42,8 +47,9 @@ ret <vscale x 16 x bfloat> %res
 ; ld2w
 define <vscale x 8 x i32> @ld2.nxv8i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv8i32:
-; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i32, i32 *  %addr, i64 %a
 %res = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
 ret <vscale x 8 x i32> %res
@@ -51,8 +57,9 @@ ret <vscale x 8 x i32> %res
 
 define <vscale x 8 x float> @ld2.nxv8f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv8f32:
-; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr float, float *  %addr, i64 %a
 %res = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1> %Pg, float *%addr2)
 ret <vscale x 8 x float> %res
@@ -61,8 +68,9 @@ ret <vscale x 8 x float> %res
 ; ld2d
 define <vscale x 4 x i64> @ld2.nxv4i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv4i64:
-; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i64, i64 *  %addr, i64 %a
 %res = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
 ret <vscale x 4 x i64> %res
@@ -70,8 +78,9 @@ ret <vscale x 4 x i64> %res
 
 define <vscale x 4 x double> @ld2.nxv4f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
 ; CHECK-LABEL: ld2.nxv4f64:
-; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr double, double *  %addr, i64 %a
 %res = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1> %Pg, double *%addr2)
 ret <vscale x 4 x double> %res
@@ -80,8 +89,9 @@ ret <vscale x 4 x double> %res
 ; ld3b
 define <vscale x 48 x i8> @ld3.nxv48i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv48i8:
-; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i8, i8 *  %addr, i64 %a
 %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
 ret <vscale x 48 x i8> %res
@@ -90,8 +100,9 @@ ret <vscale x 48 x i8> %res
 ; ld3h
 define <vscale x 24 x i16> @ld3.nxv24i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv24i16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i16, i16 *  %addr, i64 %a
 %res = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
 ret <vscale x 24 x i16> %res
@@ -99,8 +110,9 @@ ret <vscale x 24 x i16> %res
 
 define <vscale x 24 x half> @ld3.nxv24f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv24f16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr half, half *  %addr, i64 %a
 %res = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1> %Pg, half *%addr2)
 ret <vscale x 24 x half> %res
@@ -108,8 +120,9 @@ ret <vscale x 24 x half> %res
 
 define <vscale x 24 x bfloat> @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
 ; CHECK-LABEL: ld3.nxv24bf16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr bfloat, bfloat *  %addr, i64 %a
 %res = call <vscale x 24 x bfloat> @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
 ret <vscale x 24 x bfloat> %res
@@ -118,8 +131,9 @@ ret <vscale x 24 x bfloat> %res
 ; ld3w
 define <vscale x 12 x i32> @ld3.nxv12i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv12i32:
-; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i32, i32 *  %addr, i64 %a
 %res = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
 ret <vscale x 12 x i32> %res
@@ -127,8 +141,9 @@ ret <vscale x 12 x i32> %res
 
 define <vscale x 12 x float> @ld3.nxv12f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv12f32:
-; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr float, float *  %addr, i64 %a
 %res = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1> %Pg, float *%addr2)
 ret <vscale x 12 x float> %res
@@ -137,8 +152,9 @@ ret <vscale x 12 x float> %res
 ; ld3d
 define <vscale x 6 x i64> @ld3.nxv6i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv6i64:
-; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i64, i64 *  %addr, i64 %a
 %res = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
 ret <vscale x 6 x i64> %res
@@ -146,8 +162,9 @@ ret <vscale x 6 x i64> %res
 
 define <vscale x 6 x double> @ld3.nxv6f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
 ; CHECK-LABEL: ld3.nxv6f64:
-; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr double, double *  %addr, i64 %a
 %res = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1> %Pg, double *%addr2)
 ret <vscale x 6 x double> %res
@@ -156,8 +173,9 @@ ret <vscale x 6 x double> %res
 ; ld4b
 define <vscale x 64 x i8> @ld4.nxv64i8(<vscale x 16 x i1> %Pg, i8 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv64i8:
-; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i8, i8 *  %addr, i64 %a
 %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %Pg, i8 *%addr2)
 ret <vscale x 64 x i8> %res
@@ -166,8 +184,9 @@ ret <vscale x 64 x i8> %res
 ; ld4h
 define <vscale x 32 x i16> @ld4.nxv32i16(<vscale x 8 x i1> %Pg, i16 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv32i16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i16, i16 *  %addr, i64 %a
 %res = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1> %Pg, i16 *%addr2)
 ret <vscale x 32 x i16> %res
@@ -175,8 +194,9 @@ ret <vscale x 32 x i16> %res
 
 define <vscale x 32 x half> @ld4.nxv32f16(<vscale x 8 x i1> %Pg, half *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv32f16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr half, half *  %addr, i64 %a
 %res = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1> %Pg, half *%addr2)
 ret <vscale x 32 x half> %res
@@ -184,8 +204,9 @@ ret <vscale x 32 x half> %res
 
 define <vscale x 32 x bfloat> @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 {
 ; CHECK-LABEL: ld4.nxv32bf16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr bfloat, bfloat *  %addr, i64 %a
 %res = call <vscale x 32 x bfloat> @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %Pg, bfloat *%addr2)
 ret <vscale x 32 x bfloat> %res
@@ -194,8 +215,9 @@ ret <vscale x 32 x bfloat> %res
 ; ld4w
 define <vscale x 16 x i32> @ld4.nxv16i32(<vscale x 4 x i1> %Pg, i32 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv16i32:
-; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i32, i32 *  %addr, i64 %a
 %res = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1> %Pg, i32 *%addr2)
 ret <vscale x 16 x i32> %res
@@ -203,8 +225,9 @@ ret <vscale x 16 x i32> %res
 
 define <vscale x 16 x float> @ld4.nxv16f32(<vscale x 4 x i1> %Pg, float *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv16f32:
-; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr float, float *  %addr, i64 %a
 %res = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1> %Pg, float *%addr2)
 ret <vscale x 16 x float> %res
@@ -213,8 +236,9 @@ ret <vscale x 16 x float> %res
 ; ld4d
 define <vscale x 8 x i64> @ld4.nxv8i64(<vscale x 2 x i1> %Pg, i64 *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv8i64:
-; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr i64, i64 *  %addr, i64 %a
 %res = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1> %Pg, i64 *%addr2)
 ret <vscale x 8 x i64> %res
@@ -222,8 +246,9 @@ ret <vscale x 8 x i64> %res
 
 define <vscale x 8 x double> @ld4.nxv8f64(<vscale x 2 x i1> %Pg, double *%addr, i64 %a) {
 ; CHECK-LABEL: ld4.nxv8f64:
-; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
 %addr2 = getelementptr double, double *  %addr, i64 %a
 %res = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1> %Pg, double *%addr2)
 ret <vscale x 8 x double> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
index 96de8cc678024..1fcbc36f8bada 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,16 +7,18 @@
 
 define <vscale x 16 x i8> @ldff1b(<vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1b:
-; CHECK: ldff1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %a)
   ret <vscale x 16 x i8> %load
 }
 
 define <vscale x 16 x i8> @ldff1b_reg(<vscale x 16 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1b_reg:
-; CHECK: ldff1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.b }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base)
   ret <vscale x 16 x i8> %load
@@ -23,8 +26,9 @@ define <vscale x 16 x i8> @ldff1b_reg(<vscale x 16 x i1> %pg, i8* %a, i64 %offse
 
 define <vscale x 8 x i16> @ldff1b_h(<vscale x 8 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1b_h:
-; CHECK: ldff1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, i8* %a)
   %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %res
@@ -32,8 +36,9 @@ define <vscale x 8 x i16> @ldff1b_h(<vscale x 8 x i1> %pg, i8* %a) {
 
 define <vscale x 8 x i16> @ldff1b_h_reg(<vscale x 8 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1b_h_reg:
-; CHECK: ldff1b { z0.h }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.h }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, i8* %base)
   %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
@@ -42,8 +47,9 @@ define <vscale x 8 x i16> @ldff1b_h_reg(<vscale x 8 x i1> %pg, i8* %a, i64 %offs
 
 define <vscale x 4 x i32> @ldff1b_s(<vscale x 4 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1b_s:
-; CHECK: ldff1b { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, i8* %a)
   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -51,8 +57,9 @@ define <vscale x 4 x i32> @ldff1b_s(<vscale x 4 x i1> %pg, i8* %a) {
 
 define <vscale x 4 x i32> @ldff1b_s_reg(<vscale x 4 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1b_s_reg:
-; CHECK: ldff1b { z0.s }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, i8* %base)
   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
@@ -61,8 +68,9 @@ define <vscale x 4 x i32> @ldff1b_s_reg(<vscale x 4 x i1> %pg, i8* %a, i64 %offs
 
 define <vscale x 2 x i64> @ldff1b_d(<vscale x 2 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1b_d:
-; CHECK: ldff1b { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, i8* %a)
   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -70,8 +78,9 @@ define <vscale x 2 x i64> @ldff1b_d(<vscale x 2 x i1> %pg, i8* %a) {
 
 define <vscale x 2 x i64> @ldff1b_d_reg(<vscale x 2 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1b_d_reg:
-; CHECK: ldff1b { z0.d }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, i8* %base)
   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
@@ -84,8 +93,9 @@ define <vscale x 2 x i64> @ldff1b_d_reg(<vscale x 2 x i1> %pg, i8* %a, i64 %offs
 
 define <vscale x 8 x i16> @ldff1sb_h(<vscale x 8 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1sb_h:
-; CHECK: ldff1sb { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, i8* %a)
   %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %res
@@ -93,8 +103,9 @@ define <vscale x 8 x i16> @ldff1sb_h(<vscale x 8 x i1> %pg, i8* %a) {
 
 define <vscale x 8 x i16> @ldff1sb_h_reg(<vscale x 8 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1sb_h_reg:
-; CHECK: ldff1sb { z0.h }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.h }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, i8* %base)
   %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
@@ -103,8 +114,9 @@ define <vscale x 8 x i16> @ldff1sb_h_reg(<vscale x 8 x i1> %pg, i8* %a, i64 %off
 
 define <vscale x 4 x i32> @ldff1sb_s(<vscale x 4 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1sb_s:
-; CHECK: ldff1sb { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, i8* %a)
   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -112,8 +124,9 @@ define <vscale x 4 x i32> @ldff1sb_s(<vscale x 4 x i1> %pg, i8* %a) {
 
 define <vscale x 4 x i32> @ldff1sb_s_reg(<vscale x 4 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1sb_s_reg:
-; CHECK: ldff1sb { z0.s }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, i8* %base)
   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
@@ -122,8 +135,9 @@ define <vscale x 4 x i32> @ldff1sb_s_reg(<vscale x 4 x i1> %pg, i8* %a, i64 %off
 
 define <vscale x 2 x i64> @ldff1sb_d(<vscale x 2 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: ldff1sb_d:
-; CHECK: ldff1sb { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, i8* %a)
   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -131,8 +145,9 @@ define <vscale x 2 x i64> @ldff1sb_d(<vscale x 2 x i1> %pg, i8* %a) {
 
 define <vscale x 2 x i64> @ldff1sb_d_reg(<vscale x 2 x i1> %pg, i8* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1sb_d_reg:
-; CHECK: ldff1sb { z0.d }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %offset
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, i8* %base)
   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
@@ -145,16 +160,18 @@ define <vscale x 2 x i64> @ldff1sb_d_reg(<vscale x 2 x i1> %pg, i8* %a, i64 %off
 
 define <vscale x 8 x i16> @ldff1h(<vscale x 8 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ldff1h:
-; CHECK: ldff1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %pg, i16* %a)
   ret <vscale x 8 x i16> %load
 }
 
 define <vscale x 8 x i16> @ldff1h_reg(<vscale x 8 x i1> %pg, i16* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1h_reg:
-; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %offset
   %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %pg, i16* %base)
   ret <vscale x 8 x i16> %load
@@ -162,8 +179,9 @@ define <vscale x 8 x i16> @ldff1h_reg(<vscale x 8 x i1> %pg, i16* %a, i64 %offse
 
 define <vscale x 4 x i32> @ldff1h_s(<vscale x 4 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ldff1h_s:
-; CHECK: ldff1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, i16* %a)
   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -171,8 +189,9 @@ define <vscale x 4 x i32> @ldff1h_s(<vscale x 4 x i1> %pg, i16* %a) {
 
 define <vscale x 4 x i32> @ldff1h_s_reg(<vscale x 4 x i1> %pg, i16* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1h_s_reg:
-; CHECK: ldff1h { z0.s }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %offset
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, i16* %base)
   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
@@ -181,8 +200,9 @@ define <vscale x 4 x i32> @ldff1h_s_reg(<vscale x 4 x i1> %pg, i16* %a, i64 %off
 
 define <vscale x 2 x i64> @ldff1h_d(<vscale x 2 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ldff1h_d:
-; CHECK: ldff1h { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, i16* %a)
   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -190,8 +210,9 @@ define <vscale x 2 x i64> @ldff1h_d(<vscale x 2 x i1> %pg, i16* %a) {
 
 define <vscale x 2 x i64> @ldff1h_d_reg(<vscale x 2 x i1> %pg, i16* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1h_d_reg:
-; CHECK: ldff1h { z0.d }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %offset
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, i16* %base)
   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
@@ -200,24 +221,27 @@ define <vscale x 2 x i64> @ldff1h_d_reg(<vscale x 2 x i1> %pg, i16* %a, i64 %off
 
 define <vscale x 8 x half> @ldff1h_f16(<vscale x 8 x i1> %pg, half* %a) {
 ; CHECK-LABEL: ldff1h_f16:
-; CHECK: ldff1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %pg, half* %a)
   ret <vscale x 8 x half> %load
 }
 
 define <vscale x 8 x bfloat> @ldff1h_bf16(<vscale x 8 x i1> %pg, bfloat* %a) #0 {
 ; CHECK-LABEL: ldff1h_bf16:
-; CHECK: ldff1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %a)
   ret <vscale x 8 x bfloat> %load
 }
 
 define <vscale x 8 x half> @ldff1h_f16_reg(<vscale x 8 x i1> %pg, half* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1h_f16_reg:
-; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr half, half* %a, i64 %offset
   %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %pg, half* %base)
   ret <vscale x 8 x half> %load
@@ -225,8 +249,9 @@ define <vscale x 8 x half> @ldff1h_f16_reg(<vscale x 8 x i1> %pg, half* %a, i64
 
 define <vscale x 8 x bfloat> @ldff1h_bf16_reg(<vscale x 8 x i1> %pg, bfloat* %a, i64 %offset) #0 {
 ; CHECK-LABEL: ldff1h_bf16_reg:
-; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr bfloat, bfloat* %a, i64 %offset
   %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base)
   ret <vscale x 8 x bfloat> %load
@@ -238,8 +263,9 @@ define <vscale x 8 x bfloat> @ldff1h_bf16_reg(<vscale x 8 x i1> %pg, bfloat* %a,
 
 define <vscale x 4 x i32> @ldff1sh_s(<vscale x 4 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ldff1sh_s:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, i16* %a)
   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %res
@@ -247,8 +273,9 @@ define <vscale x 4 x i32> @ldff1sh_s(<vscale x 4 x i1> %pg, i16* %a) {
 
 define <vscale x 4 x i32> @ldff1sh_s_reg(<vscale x 4 x i1> %pg, i16* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1sh_s_reg:
-; CHECK: ldff1sh { z0.s }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %offset
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, i16* %base)
   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
@@ -257,8 +284,9 @@ define <vscale x 4 x i32> @ldff1sh_s_reg(<vscale x 4 x i1> %pg, i16* %a, i64 %of
 
 define <vscale x 2 x i64> @ldff1sh_d(<vscale x 2 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: ldff1sh_d:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, i16* %a)
   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -266,8 +294,9 @@ define <vscale x 2 x i64> @ldff1sh_d(<vscale x 2 x i1> %pg, i16* %a) {
 
 define <vscale x 2 x i64> @ldff1sh_d_reg(<vscale x 2 x i1> %pg, i16* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1sh_d_reg:
-; CHECK: ldff1sh { z0.d }, p0/z, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %offset
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, i16* %base)
   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
@@ -280,16 +309,18 @@ define <vscale x 2 x i64> @ldff1sh_d_reg(<vscale x 2 x i1> %pg, i16* %a, i64 %of
 
 define <vscale x 4 x i32> @ldff1w(<vscale x 4 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ldff1w:
-; CHECK: ldff1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %pg, i32* %a)
   ret <vscale x 4 x i32> %load
 }
 
 define <vscale x 4 x i32> @ldff1w_reg(<vscale x 4 x i1> %pg, i32* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1w_reg:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %offset
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %pg, i32* %base)
   ret <vscale x 4 x i32> %load
@@ -297,8 +328,9 @@ define <vscale x 4 x i32> @ldff1w_reg(<vscale x 4 x i1> %pg, i32* %a, i64 %offse
 
 define <vscale x 2 x i64> @ldff1w_d(<vscale x 2 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ldff1w_d:
-; CHECK: ldff1w { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, i32* %a)
   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -306,8 +338,9 @@ define <vscale x 2 x i64> @ldff1w_d(<vscale x 2 x i1> %pg, i32* %a) {
 
 define <vscale x 2 x i64> @ldff1w_d_reg(<vscale x 2 x i1> %pg, i32* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1w_d_reg:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %offset
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, i32* %base)
   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
@@ -316,16 +349,18 @@ define <vscale x 2 x i64> @ldff1w_d_reg(<vscale x 2 x i1> %pg, i32* %a, i64 %off
 
 define <vscale x 4 x float> @ldff1w_f32(<vscale x 4 x i1> %pg, float* %a) {
 ; CHECK-LABEL: ldff1w_f32:
-; CHECK: ldff1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %pg, float* %a)
   ret <vscale x 4 x float> %load
 }
 
 define <vscale x 4 x float> @ldff1w_f32_reg(<vscale x 4 x i1> %pg, float* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1w_f32_reg:
-; CHECK: ldff1w { z0.s }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr float, float* %a, i64 %offset
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %pg, float* %base)
   ret <vscale x 4 x float> %load
@@ -333,16 +368,18 @@ define <vscale x 4 x float> @ldff1w_f32_reg(<vscale x 4 x i1> %pg, float* %a, i6
 
 define <vscale x 2 x float> @ldff1w_2f32(<vscale x 2 x i1> %pg, float* %a) {
 ; CHECK-LABEL: ldff1w_2f32:
-; CHECK: ldff1w { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1> %pg, float* %a)
   ret <vscale x 2 x float> %load
 }
 
 define <vscale x 2 x float> @ldff1w_2f32_reg(<vscale x 2 x i1> %pg, float* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1w_2f32_reg:
-; CHECK: ldff1w { z0.d }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr float, float* %a, i64 %offset
   %load = call <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1> %pg, float* %base)
   ret <vscale x 2 x float> %load
@@ -354,8 +391,9 @@ define <vscale x 2 x float> @ldff1w_2f32_reg(<vscale x 2 x i1> %pg, float* %a, i
 
 define <vscale x 2 x i64> @ldff1sw_d(<vscale x 2 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: ldff1sw_d:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, i32* %a)
   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %res
@@ -363,8 +401,9 @@ define <vscale x 2 x i64> @ldff1sw_d(<vscale x 2 x i1> %pg, i32* %a) {
 
 define <vscale x 2 x i64> @ldff1sw_d_reg(<vscale x 2 x i1> %pg, i32* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1sw_d_reg:
-; CHECK: ldff1sw { z0.d }, p0/z, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %offset
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, i32* %base)
   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
@@ -377,16 +416,18 @@ define <vscale x 2 x i64> @ldff1sw_d_reg(<vscale x 2 x i1> %pg, i32* %a, i64 %of
 
 define <vscale x 2 x i64> @ldff1d(<vscale x 2 x i1> %pg, i64* %a) {
 ; CHECK-LABEL: ldff1d:
-; CHECK: ldff1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %pg, i64* %a)
   ret <vscale x 2 x i64> %load
 }
 
 define <vscale x 2 x i64> @ldff1d_reg(<vscale x 2 x i1> %pg, i64* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1d_reg:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr i64, i64* %a, i64 %offset
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %pg, i64* %base)
   ret <vscale x 2 x i64> %load
@@ -395,16 +436,18 @@ define <vscale x 2 x i64> @ldff1d_reg(<vscale x 2 x i1> %pg, i64* %a, i64 %offse
 
 define <vscale x 2 x double> @ldff1d_f64(<vscale x 2 x i1> %pg, double* %a) {
 ; CHECK-LABEL: ldff1d_f64:
-; CHECK: ldff1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %pg, double* %a)
   ret <vscale x 2 x double> %load
 }
 
 define <vscale x 2 x double> @ldff1d_f64_reg(<vscale x 2 x i1> %pg, double* %a, i64 %offset) {
 ; CHECK-LABEL: ldff1d_f64_reg:
-; CHECK: ldff1d { z0.d }, p0/z, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr double, double* %a, i64 %offset
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %pg, double* %base)
   ret <vscale x 2 x double> %load

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
index e1e90ef4c10ba..ea5ad352c28b7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
 
 ;
 ; LD1RQB
@@ -6,16 +7,18 @@
 
 define <vscale x 16 x i8> @ld1rqb_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1rqb_i8:
-; CHECK: ld1rqb { z0.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqb { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %addr)
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 16 x i8> @ld1rqb_i8_imm(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1rqb_i8_imm:
-; CHECK: ld1rqb { z0.b }, p0/z, [x0, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqb { z0.b }, p0/z, [x0, #16]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %addr, i8 16
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
   ret <vscale x 16 x i8> %res
@@ -23,8 +26,9 @@ define <vscale x 16 x i8> @ld1rqb_i8_imm(<vscale x 16 x i1> %pred, i8* %addr) {
 
 define <vscale x 16 x i8> @ld1rqb_i8_imm_lower_bound(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1rqb_i8_imm_lower_bound:
-; CHECK: ld1rqb { z0.b }, p0/z, [x0, #-128]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqb { z0.b }, p0/z, [x0, #-128]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %addr, i8 -128
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
   ret <vscale x 16 x i8> %res
@@ -32,8 +36,9 @@ define <vscale x 16 x i8> @ld1rqb_i8_imm_lower_bound(<vscale x 16 x i1> %pred, i
 
 define <vscale x 16 x i8> @ld1rqb_i8_imm_upper_bound(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1rqb_i8_imm_upper_bound:
-; CHECK: ld1rqb { z0.b }, p0/z, [x0, #112]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqb { z0.b }, p0/z, [x0, #112]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %addr, i8 112
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
   ret <vscale x 16 x i8> %res
@@ -41,9 +46,10 @@ define <vscale x 16 x i8> @ld1rqb_i8_imm_upper_bound(<vscale x 16 x i1> %pred, i
 
 define <vscale x 16 x i8> @ld1rqb_i8_imm_out_of_lower_bound(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1rqb_i8_imm_out_of_lower_bound:
-; CHECK: sub x8, x0, #129
-; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x8, x0, #129
+; CHECK-NEXT:    ld1rqb { z0.b }, p0/z, [x8]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %addr, i64 -129
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
   ret <vscale x 16 x i8> %res
@@ -51,9 +57,10 @@ define <vscale x 16 x i8> @ld1rqb_i8_imm_out_of_lower_bound(<vscale x 16 x i1> %
 
 define <vscale x 16 x i8> @ld1rqb_i8_imm_out_of_upper_bound(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld1rqb_i8_imm_out_of_upper_bound:
-; CHECK: add x8, x0, #113
-; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, #113
+; CHECK-NEXT:    ld1rqb { z0.b }, p0/z, [x8]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i8, i8* %addr, i64 113
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> %pred, i8* %ptr)
   ret <vscale x 16 x i8> %res
@@ -65,24 +72,27 @@ define <vscale x 16 x i8> @ld1rqb_i8_imm_out_of_upper_bound(<vscale x 16 x i1> %
 
 define <vscale x 8 x i16> @ld1rqh_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1rqh_i16:
-; CHECK: ld1rqh { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqh { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1> %pred, i16* %addr)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 8 x half> @ld1rqh_f16(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ld1rqh_f16:
-; CHECK: ld1rqh { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqh { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1rq.nxv8f16(<vscale x 8 x i1> %pred, half* %addr)
   ret <vscale x 8 x half> %res
 }
 
 define <vscale x 8 x i16> @ld1rqh_i16_imm(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld1rqh_i16_imm:
-; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-64]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqh { z0.h }, p0/z, [x0, #-64]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i16, i16* %addr, i16 -32
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1> %pred, i16* %ptr)
   ret <vscale x 8 x i16> %res
@@ -90,8 +100,9 @@ define <vscale x 8 x i16> @ld1rqh_i16_imm(<vscale x 8 x i1> %pred, i16* %addr) {
 
 define <vscale x 8 x half> @ld1rqh_f16_imm(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ld1rqh_f16_imm:
-; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqh { z0.h }, p0/z, [x0, #-16]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds half, half* %addr, i16 -8
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1rq.nxv8f16(<vscale x 8 x i1> %pred, half* %ptr)
   ret <vscale x 8 x half> %res
@@ -99,16 +110,18 @@ define <vscale x 8 x half> @ld1rqh_f16_imm(<vscale x 8 x i1> %pred, half* %addr)
 
 define <vscale x 8 x bfloat> @ld1rqh_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
 ; CHECK-LABEL: ld1rqh_bf16:
-; CHECK: ld1rqh { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqh { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
   ret <vscale x 8 x bfloat> %res
 }
 
 define <vscale x 8 x bfloat> @ld1rqh_bf16_imm(<vscale x 8 x i1> %pred, bfloat* %addr) {
 ; CHECK-LABEL: ld1rqh_bf16_imm:
-; CHECK: ld1rqh { z0.h }, p0/z, [x0, #-16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqh { z0.h }, p0/z, [x0, #-16]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds bfloat, bfloat* %addr, i16 -8
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %pred, bfloat* %ptr)
   ret <vscale x 8 x bfloat> %res
@@ -120,24 +133,27 @@ define <vscale x 8 x bfloat> @ld1rqh_bf16_imm(<vscale x 8 x i1> %pred, bfloat* %
 
 define <vscale x 4 x i32> @ld1rqw_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld1rqw_i32:
-; CHECK: ld1rqw { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqw { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1> %pred, i32* %addr)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 4 x float> @ld1rqw_f32(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ld1rqw_f32:
-; CHECK: ld1rqw { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqw { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1rq.nxv4f32(<vscale x 4 x i1> %pred, float* %addr)
   ret <vscale x 4 x float> %res
 }
 
 define <vscale x 4 x i32> @ld1rqw_i32_imm(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld1rqw_i32_imm:
-; CHECK: ld1rqw { z0.s }, p0/z, [x0, #112]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqw { z0.s }, p0/z, [x0, #112]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i32, i32* %addr, i32 28
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1> %pred, i32* %ptr)
   ret <vscale x 4 x i32> %res
@@ -145,8 +161,9 @@ define <vscale x 4 x i32> @ld1rqw_i32_imm(<vscale x 4 x i1> %pred, i32* %addr) {
 
 define <vscale x 4 x float> @ld1rqw_f32_imm(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ld1rqw_f32_imm:
-; CHECK: ld1rqw { z0.s }, p0/z, [x0, #32]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqw { z0.s }, p0/z, [x0, #32]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds float, float* %addr, i32 8
   %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1rq.nxv4f32(<vscale x 4 x i1> %pred, float* %ptr)
   ret <vscale x 4 x float> %res
@@ -158,24 +175,27 @@ define <vscale x 4 x float> @ld1rqw_f32_imm(<vscale x 4 x i1> %pred, float* %add
 
 define <vscale x 2 x i64> @ld1rqd_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ld1rqd_i64:
-; CHECK: ld1rqd { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqd { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1> %pred, i64* %addr)
   ret <vscale x 2 x i64> %res
 }
 
 define <vscale x 2 x double> @ld1rqd_f64(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ld1rqd_f64:
-; CHECK: ld1rqd { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqd { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1rq.nxv2f64(<vscale x 2 x i1> %pred, double* %addr)
   ret <vscale x 2 x double> %res
 }
 
 define <vscale x 2 x i64> @ld1rqd_i64_imm(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ld1rqd_i64_imm:
-; CHECK: ld1rqd { z0.d }, p0/z, [x0, #64]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqd { z0.d }, p0/z, [x0, #64]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds i64, i64* %addr, i64 8
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1> %pred, i64* %ptr)
   ret <vscale x 2 x i64> %res
@@ -183,8 +203,9 @@ define <vscale x 2 x i64> @ld1rqd_i64_imm(<vscale x 2 x i1> %pred, i64* %addr) {
 
 define <vscale x 2 x double> @ld1rqd_f64_imm(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ld1rqd_f64_imm:
-; CHECK: ld1rqd { z0.d }, p0/z, [x0, #-128]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1rqd { z0.d }, p0/z, [x0, #-128]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds double, double* %addr, i64 -16
   %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1rq.nxv2f64(<vscale x 2 x i1> %pred, double* %ptr)
   ret <vscale x 2 x double> %res
@@ -196,8 +217,9 @@ define <vscale x 2 x double> @ld1rqd_f64_imm(<vscale x 2 x i1> %pred, double* %a
 
 define <vscale x 16 x i8> @ldnt1b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ldnt1b_i8:
-; CHECK: ldnt1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pred,
                                                                  i8* %addr)
   ret <vscale x 16 x i8> %res
@@ -209,8 +231,9 @@ define <vscale x 16 x i8> @ldnt1b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 
 define <vscale x 8 x i16> @ldnt1h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ldnt1h_i16:
-; CHECK: ldnt1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %pred,
                                                                  i16* %addr)
   ret <vscale x 8 x i16> %res
@@ -218,8 +241,9 @@ define <vscale x 8 x i16> @ldnt1h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 
 define <vscale x 8 x half> @ldnt1h_f16(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ldnt1h_f16:
-; CHECK: ldnt1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %pred,
                                                                   half* %addr)
   ret <vscale x 8 x half> %res
@@ -227,8 +251,9 @@ define <vscale x 8 x half> @ldnt1h_f16(<vscale x 8 x i1> %pred, half* %addr) {
 
 define <vscale x 8 x bfloat> @ldnt1h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
 ; CHECK-LABEL: ldnt1h_bf16:
-; CHECK: ldnt1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> %pred,
                                                                      bfloat* %addr)
   ret <vscale x 8 x bfloat> %res
@@ -240,8 +265,9 @@ define <vscale x 8 x bfloat> @ldnt1h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr
 
 define <vscale x 4 x i32> @ldnt1w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ldnt1w_i32:
-; CHECK: ldnt1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %pred,
                                                                  i32* %addr)
   ret <vscale x 4 x i32> %res
@@ -249,8 +275,9 @@ define <vscale x 4 x i32> @ldnt1w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 
 define <vscale x 4 x float> @ldnt1w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ldnt1w_f32:
-; CHECK: ldnt1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %pred,
                                                                    float* %addr)
   ret <vscale x 4 x float> %res
@@ -262,8 +289,9 @@ define <vscale x 4 x float> @ldnt1w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 
 define <vscale x 2 x i64> @ldnt1d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ldnt1d_i64:
-; CHECK: ldnt1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %pred,
                                                                  i64* %addr)
   ret <vscale x 2 x i64> %res
@@ -271,8 +299,9 @@ define <vscale x 2 x i64> @ldnt1d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 
 define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ldnt1d_f64:
-; CHECK: ldnt1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %pred,
                                                                     double* %addr)
   ret <vscale x 2 x double> %res
@@ -284,8 +313,9 @@ define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, double* %addr)
 
 define <vscale x 32 x i8> @ld2b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld2b_i8:
-; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
   ret <vscale x 32 x i8> %res
 }
@@ -296,24 +326,27 @@ define <vscale x 32 x i8> @ld2b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 
 define <vscale x 16 x i16> @ld2h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld2h_i16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
   ret <vscale x 16 x i16> %res
 }
 
 define <vscale x 16 x half> @ld2h_f16(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ld2h_f16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
   ret <vscale x 16 x half> %res
 }
 
 define <vscale x 16 x bfloat> @ld2h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
 ; CHECK-LABEL: ld2h_bf16:
-; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2h { z0.h, z1.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x bfloat> @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
   ret <vscale x 16 x bfloat> %res
 }
@@ -324,16 +357,18 @@ define <vscale x 16 x bfloat> @ld2h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
 
 define <vscale x 8 x i32> @ld2w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld2w_i32:
-; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
   ret <vscale x 8 x i32> %res
 }
 
 define <vscale x 8 x float> @ld2w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ld2w_f32:
-; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
   ret <vscale x 8 x float> %res
 }
@@ -344,16 +379,18 @@ define <vscale x 8 x float> @ld2w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 
 define <vscale x 4 x i64> @ld2d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ld2d_i64:
-; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
   ret <vscale x 4 x i64> %res
 }
 
 define <vscale x 4 x double> @ld2d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ld2d_f64:
-; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
   ret <vscale x 4 x double> %res
 }
@@ -364,8 +401,9 @@ define <vscale x 4 x double> @ld2d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 
 define <vscale x 48 x i8> @ld3b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld3b_i8:
-; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
   ret <vscale x 48 x i8> %res
 }
@@ -376,24 +414,27 @@ define <vscale x 48 x i8> @ld3b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 
 define <vscale x 24 x i16> @ld3h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld3h_i16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
   ret <vscale x 24 x i16> %res
 }
 
 define <vscale x 24 x half> @ld3h_f16(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ld3h_f16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
   ret <vscale x 24 x half> %res
 }
 
 define <vscale x 24 x bfloat> @ld3h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
 ; CHECK-LABEL: ld3h_bf16:
-; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 24 x bfloat> @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
   ret <vscale x 24 x bfloat> %res
 }
@@ -404,16 +445,18 @@ define <vscale x 24 x bfloat> @ld3h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
 
 define <vscale x 12 x i32> @ld3w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld3w_i32:
-; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
   ret <vscale x 12 x i32> %res
 }
 
 define <vscale x 12 x float> @ld3w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ld3w_f32:
-; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
   ret <vscale x 12 x float> %res
 }
@@ -424,16 +467,18 @@ define <vscale x 12 x float> @ld3w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 
 define <vscale x 6 x i64> @ld3d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ld3d_i64:
-; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
   ret <vscale x 6 x i64> %res
 }
 
 define <vscale x 6 x double> @ld3d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ld3d_f64:
-; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
   ret <vscale x 6 x double> %res
 }
@@ -444,8 +489,9 @@ define <vscale x 6 x double> @ld3d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 
 define <vscale x 64 x i8> @ld4b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: ld4b_i8:
-; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
   ret <vscale x 64 x i8> %res
 }
@@ -456,24 +502,27 @@ define <vscale x 64 x i8> @ld4b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
 
 define <vscale x 32 x i16> @ld4h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: ld4h_i16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
   ret <vscale x 32 x i16> %res
 }
 
 define <vscale x 32 x half> @ld4h_f16(<vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: ld4h_f16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
   ret <vscale x 32 x half> %res
 }
 
 define <vscale x 32 x bfloat> @ld4h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) {
 ; CHECK-LABEL: ld4h_bf16:
-; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 32 x bfloat> @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
   ret <vscale x 32 x bfloat> %res
 }
@@ -484,16 +533,18 @@ define <vscale x 32 x bfloat> @ld4h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr)
 
 define <vscale x 16 x i32> @ld4w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: ld4w_i32:
-; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
   ret <vscale x 16 x i32> %res
 }
 
 define <vscale x 16 x float> @ld4w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: ld4w_f32:
-; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
   ret <vscale x 16 x float> %res
 }
@@ -504,16 +555,18 @@ define <vscale x 16 x float> @ld4w_f32(<vscale x 4 x i1> %pred, float* %addr) {
 
 define <vscale x 8 x i64> @ld4d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: ld4d_i64:
-; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
   ret <vscale x 8 x i64> %res
 }
 
 define <vscale x 8 x double> @ld4d_f64(<vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: ld4d_f64:
-; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
   ret <vscale x 8 x double> %res
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
index ab2d9584a4c11..033b4b61ec318 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
@@ -1,13 +1,15 @@
-; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
 ; AND
 ;
 
-define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a) #0 {
+define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: and_i8:
-; CHECK: and z0.b, z0.b, #0x7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.b, z0.b, #0x7
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 7, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %pg,
@@ -16,10 +18,11 @@ define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a) #0 {
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a) #0 {
+define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: and_i16:
-; CHECK: and z0.h, z0.h, #0xf0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.h, z0.h, #0xf0
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 240, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %pg,
@@ -28,10 +31,11 @@ define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a) #0 {
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: and_i32:
-; CHECK: and z0.s, z0.s, #0xffff00
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff00
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg,
@@ -40,10 +44,11 @@ define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a) #0 {
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) #0 {
+define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: and_i64:
-; CHECK: and z0.d, z0.d, #0xfffc000000000000
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, #0xfffc000000000000
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
@@ -56,10 +61,11 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) #0 {
 ; BIC
 ;
 
-define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) #0 {
+define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: bic_i8:
-; CHECK: and z0.b, z0.b, #0x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.b, z0.b, #0x1
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 254, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
@@ -68,10 +74,11 @@ define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) #0 {
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) #0 {
+define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: bic_i16:
-; CHECK: and z0.h, z0.h, #0x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.h, z0.h, #0x1
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 65534, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
@@ -80,10 +87,11 @@ define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) #0 {
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: bic_i32:
-; CHECK: and z0.s, z0.s, #0xff0000ff
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.s, z0.s, #0xff0000ff
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
@@ -92,10 +100,11 @@ define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) #0 {
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) #0 {
+define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: bic_i64:
-; CHECK: and z0.d, z0.d, #0x3ffffffffffff
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, #0x3ffffffffffff
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
@@ -108,10 +117,11 @@ define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) #0 {
 ; EOR
 ;
 
-define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a) #0 {
+define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: eor_i8:
-; CHECK: eor z0.b, z0.b, #0xf
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.b, z0.b, #0xf
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 15, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %pg,
@@ -120,10 +130,11 @@ define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a) #0 {
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a) #0 {
+define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: eor_i16:
-; CHECK: eor z0.h, z0.h, #0xfc07
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.h, z0.h, #0xfc07
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 64519, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %pg,
@@ -132,10 +143,11 @@ define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a) #0 {
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: eor_i32:
-; CHECK: eor z0.s, z0.s, #0xffff00
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.s, z0.s, #0xffff00
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg,
@@ -144,10 +156,11 @@ define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a) #0 {
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a) #0 {
+define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: eor_i64:
-; CHECK: eor z0.d, z0.d, #0x1000000000000
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.d, z0.d, #0x1000000000000
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 281474976710656, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %pg,
@@ -160,10 +173,11 @@ define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a) #0 {
 ; ORR
 ;
 
-define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a) #0 {
+define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: orr_i8:
-; CHECK: orr z0.b, z0.b, #0x6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.b, z0.b, #0x6
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 6, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %pg,
@@ -172,10 +186,11 @@ define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a) #0 {
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a) #0 {
+define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: orr_i16:
-; CHECK: orr z0.h, z0.h, #0x8001
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.h, z0.h, #0x8001
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 32769, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %pg,
@@ -184,10 +199,11 @@ define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a) #0 {
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: orr_i32:
-; CHECK: orr z0.s, z0.s, #0xffff
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 65535, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg,
@@ -196,10 +212,11 @@ define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a) #0 {
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a) #0 {
+define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: orr_i64:
-; CHECK: orr z0.d, z0.d, #0x7ffc000000000000
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, #0x7ffc000000000000
+; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 9222246136947933184, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %pg,
@@ -209,10 +226,11 @@ define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a) #0 {
 }
 
 ; As orr_i32 but where pg is i8 based and thus compatible for i32.
-define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: orr_i32_ptrue_all_b:
-; CHECK: orr z0.s, z0.s, #0xffff
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
@@ -223,10 +241,11 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
 }
 
 ; As orr_i32 but where pg is i16 based and thus compatible for i32.
-define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: orr_i32_ptrue_all_h:
-; CHECK: orr z0.s, z0.s, #0xffff
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ret
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -239,13 +258,14 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
 
 ; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
 ; thus inactive lanes are important and the immediate form cannot be used.
-define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
+define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: orr_i32_ptrue_all_d:
-; CHECK-DAG: mov [[IMM:w[0-9]+]], #65535
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, [[IMM]]
-; CHECK-DAG: orr z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    orr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -290,5 +310,3 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
-
-attributes #0 = { "target-features"="+sve" }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
index 59b546f62328d..a2def791cdc9c 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @cnot_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cnot_i8:
-; CHECK: cnot z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnot z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b)
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @cnot_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg
 
 define <vscale x 8 x i16> @cnot_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cnot_i16:
-; CHECK: cnot z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnot z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
@@ -27,8 +30,9 @@ define <vscale x 8 x i16> @cnot_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x i32> @cnot_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cnot_i32:
-; CHECK: cnot z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnot z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @cnot_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @cnot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cnot_i64:
-; CHECK: cnot z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnot z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -51,8 +56,9 @@ define <vscale x 2 x i64> @cnot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 16 x i8> @not_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: not_i8:
-; CHECK: not z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    not z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %b)
@@ -61,8 +67,9 @@ define <vscale x 16 x i8> @not_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg,
 
 define <vscale x 8 x i16> @not_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: not_i16:
-; CHECK: not z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    not z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %b)
@@ -71,8 +78,9 @@ define <vscale x 8 x i16> @not_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg,
 
 define <vscale x 4 x i32> @not_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: not_i32:
-; CHECK: not z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    not z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %b)
@@ -81,8 +89,9 @@ define <vscale x 4 x i32> @not_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,
 
 define <vscale x 2 x i64> @not_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: not_i64:
-; CHECK: not z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    not z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
index add3622ebf7e8..bbb9248932a77 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f32mm -asm-verbose=0 < %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f32mm < %s -o - | FileCheck %s
 
 define <vscale x 4 x float> @fmmla_s(<vscale x 4 x float> %r, <vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
-entry:
 ; CHECK-LABEL: fmmla_s:
-; CHECK-NEXT:  fmmla   z0.s, z1.s, z2.s
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmmla z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32(<vscale x 4 x float> %r, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %val
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
index 80dd7a0177474..138efbe0daafd 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm < %s -o - | FileCheck %s
 
 define <vscale x 2 x double> @fmmla_d(<vscale x 2 x double> %r, <vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
-entry:
 ; CHECK-LABEL: fmmla_d:
-; CHECK-NEXT:  fmmla   z0.d, z1.d, z2.d
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmmla z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmmla.nxv2f64(<vscale x 2 x double> %r, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %val
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
index 2d672c86cb6ac..0cb9833a05282 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
@@ -1,109 +1,122 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+i8mm -asm-verbose=0 < %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+i8mm < %s -o - | FileCheck %s
 
 define <vscale x 4 x i32> @smmla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: smmla:
-; CHECK-NEXT:  smmla   z0.s, z1.b, z2.b
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    smmla z0.s, z1.b, z2.b
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smmla.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @ummla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: ummla:
-; CHECK-NEXT:  ummla   z0.s, z1.b, z2.b
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ummla z0.s, z1.b, z2.b
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ummla.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @usmmla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: usmmla:
-; CHECK-NEXT:  usmmla   z0.s, z1.b, z2.b
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usmmla z0.s, z1.b, z2.b
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usmmla.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: usdot:
-; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot z0.s, z1.b, z2.b
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @usdot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: usdot_lane_0:
-; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot z0.s, z1.b, z2.b[0]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @usdot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: usdot_lane_1:
-; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot z0.s, z1.b, z2.b[1]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @usdot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: usdot_lane_2:
-; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot z0.s, z1.b, z2.b[2]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @usdot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: usdot_lane_3:
-; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot z0.s, z1.b, z2.b[3]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @sudot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: sudot_lane_0:
-; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[0]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sudot z0.s, z1.b, z2.b[0]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @sudot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: sudot_lane_1:
-; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[1]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sudot z0.s, z1.b, z2.b[1]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @sudot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: sudot_lane_2:
-; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[2]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sudot z0.s, z1.b, z2.b[2]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 4 x i32> @sudot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
-entry:
 ; CHECK-LABEL: sudot_lane_3:
-; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[3]
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sudot z0.s, z1.b, z2.b[3]
+; CHECK-NEXT:    ret
+entry:
   %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
   ret <vscale x 4 x i32> %val
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll
index f4a46c20a8777..b47c178e7ebab 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm < %s | FileCheck %s
 
 ;
 ; TRN1Q
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
 ; CHECK-LABEL: trn1_i8:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1q.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -15,8 +17,9 @@ define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
 ; CHECK-LABEL: trn1_i16:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1q.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -24,8 +27,9 @@ define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
 ; CHECK-LABEL: trn1_i32:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1q.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -33,8 +37,9 @@ define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
 ; CHECK-LABEL: trn1_i64:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1q.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -42,8 +47,9 @@ define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
 ; CHECK-LABEL: trn1_f16:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1q.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -51,8 +57,9 @@ define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
 ; CHECK-LABEL: trn1_bf16:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1q.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                      <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -60,8 +67,9 @@ define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: trn1_f32:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1q.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -69,8 +77,9 @@ define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
 ; CHECK-LABEL: trn1_f64:
-; CHECK-NEXT:  trn1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1q.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -82,8 +91,9 @@ define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
 ; CHECK-LABEL: trn2_i8:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2q.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -91,8 +101,9 @@ define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
 ; CHECK-LABEL: trn2_i16:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2q.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -100,8 +111,9 @@ define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
 ; CHECK-LABEL: trn2_i32:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2q.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -109,8 +121,9 @@ define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
 ; CHECK-LABEL: trn2_i64:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2q.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -118,8 +131,9 @@ define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
 ; CHECK-LABEL: trn2_f16:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2q.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -127,8 +141,9 @@ define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
 ; CHECK-LABEL: trn2_bf16:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2q.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                      <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -136,8 +151,9 @@ define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: trn2_f32:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2q.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -145,8 +161,9 @@ define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
 ; CHECK-LABEL: trn2_f64:
-; CHECK-NEXT:  trn2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2q.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -158,8 +175,9 @@ define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
 ; CHECK-LABEL: uzp1_i8:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1q.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -167,8 +185,9 @@ define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
 ; CHECK-LABEL: uzp1_i16:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1q.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -176,8 +195,9 @@ define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
 ; CHECK-LABEL: uzp1_i32:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1q.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -185,8 +205,9 @@ define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
 ; CHECK-LABEL: uzp1_i64:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1q.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -194,8 +215,9 @@ define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
 ; CHECK-LABEL: uzp1_f16:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1q.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -203,8 +225,9 @@ define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
 ; CHECK-LABEL: uzp1_bf16:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1q.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                      <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -212,8 +235,9 @@ define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: uzp1_f32:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1q.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -221,8 +245,9 @@ define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
 ; CHECK-LABEL: uzp1_f64:
-; CHECK-NEXT:  uzp1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1q.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -234,8 +259,9 @@ define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
 ; CHECK-LABEL: uzp2_i8:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2q.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -243,8 +269,9 @@ define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
 ; CHECK-LABEL: uzp2_i16:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2q.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -252,8 +279,9 @@ define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
 ; CHECK-LABEL: uzp2_i32:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2q.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -261,8 +289,9 @@ define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
 ; CHECK-LABEL: uzp2_i64:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2q.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -270,8 +299,9 @@ define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
 ; CHECK-LABEL: uzp2_f16:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2q.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -279,8 +309,9 @@ define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
 ; CHECK-LABEL: uzp2_bf16:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2q.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                      <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -288,8 +319,9 @@ define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: uzp2_f32:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2q.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -297,8 +329,9 @@ define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
 ; CHECK-LABEL: uzp2_f64:
-; CHECK-NEXT:  uzp2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2q.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -310,8 +343,9 @@ define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
 ; CHECK-LABEL: zip1_i8:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1q.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -319,8 +353,9 @@ define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
 ; CHECK-LABEL: zip1_i16:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1q.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -328,8 +363,9 @@ define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
 ; CHECK-LABEL: zip1_i32:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1q.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -337,8 +373,9 @@ define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
 ; CHECK-LABEL: zip1_i64:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1q.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -346,8 +383,9 @@ define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
 ; CHECK-LABEL: zip1_f16:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1q.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -355,8 +393,9 @@ define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
 ; CHECK-LABEL: zip1_bf16:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1q.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                      <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -364,8 +403,9 @@ define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: zip1_f32:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1q.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -373,8 +413,9 @@ define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
 ; CHECK-LABEL: zip1_f64:
-; CHECK-NEXT:  zip1 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1q.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -386,8 +427,9 @@ define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
 ; CHECK-LABEL: zip2_i8:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2q.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -395,8 +437,9 @@ define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
 ; CHECK-LABEL: zip2_i16:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2q.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -404,8 +447,9 @@ define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
 ; CHECK-LABEL: zip2_i32:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2q.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -413,8 +457,9 @@ define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
 ; CHECK-LABEL: zip2_i64:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2q.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -422,8 +467,9 @@ define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
 ; CHECK-LABEL: zip2_f16:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2q.nxv8f16(<vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -431,8 +477,9 @@ define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
 ; CHECK-LABEL: zip2_bf16:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2q.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                      <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -440,8 +487,9 @@ define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
 ; CHECK-LABEL: zip2_f32:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2q.nxv4f32(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -449,8 +497,9 @@ define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
 ; CHECK-LABEL: zip2_f64:
-; CHECK-NEXT:  zip2 z0.q, z0.q, z1.q
-; CHECK-NEXT:  ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2q.nxv2f64(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -512,4 +561,4 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2q.nxv8i16(<vscale x 8 x i16>, <
 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 
 ; +bf16 is required for the bfloat version.
-attributes #0 = { "target-features"="+sve,+fp64mm,+bf16" }
+attributes #0 = { "target-features"="+sve,+f64mm,+bf16" }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index 680e3ab589ae0..df2278dc501c8 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: clasta_i8:
-; CHECK: clasta z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -16,8 +18,9 @@ define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: clasta_i16:
-; CHECK: clasta z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -26,8 +29,9 @@ define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: clasta_i32:
-; CHECK: clasta z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -36,8 +40,9 @@ define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: clasta_i64:
-; CHECK: clasta z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -46,8 +51,9 @@ define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: clasta_f16:
-; CHECK: clasta z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %a,
                                                                    <vscale x 8 x half> %b)
@@ -56,8 +62,9 @@ define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
 
 define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: clasta_bf16:
-; CHECK: clasta z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
                                                                       <vscale x 8 x bfloat> %a,
                                                                       <vscale x 8 x bfloat> %b)
@@ -66,8 +73,9 @@ define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x b
 
 define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: clasta_f32:
-; CHECK: clasta z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %a,
                                                                     <vscale x 4 x float> %b)
@@ -76,8 +84,9 @@ define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x flo
 
 define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: clasta_f64:
-; CHECK: clasta z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %a,
                                                                      <vscale x 2 x double> %b)
@@ -90,8 +99,9 @@ define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x do
 
 define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: clasta_n_i8:
-; CHECK: clasta w0, p0, w0, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta w0, p0, w0, z0.b
+; CHECK-NEXT:    ret
   %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg,
                                                     i8 %a,
                                                     <vscale x 16 x i8> %b)
@@ -100,8 +110,9 @@ define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
 
 define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: clasta_n_i16:
-; CHECK: clasta w0, p0, w0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta w0, p0, w0, z0.h
+; CHECK-NEXT:    ret
   %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg,
                                                      i16 %a,
                                                      <vscale x 8 x i16> %b)
@@ -110,8 +121,9 @@ define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
 
 define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: clasta_n_i32:
-; CHECK: clasta w0, p0, w0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta w0, p0, w0, z0.s
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg,
                                                      i32 %a,
                                                      <vscale x 4 x i32> %b)
@@ -120,8 +132,9 @@ define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
 
 define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: clasta_n_i64:
-; CHECK: clasta x0, p0, x0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta x0, p0, x0, z0.d
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg,
                                                      i64 %a,
                                                      <vscale x 2 x i64> %b)
@@ -130,8 +143,9 @@ define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
 
 define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: clasta_n_f16:
-; CHECK: clasta h0, p0, h0, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta h0, p0, h0, z1.h
+; CHECK-NEXT:    ret
   %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
                                                       half %a,
                                                       <vscale x 8 x half> %b)
@@ -140,8 +154,9 @@ define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b
 
 define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: clasta_n_bf16:
-; CHECK: clasta h0, p0, h0, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta h0, p0, h0, z1.h
+; CHECK-NEXT:    ret
   %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
                                                          bfloat %a,
                                                          <vscale x 8 x bfloat> %b)
@@ -150,8 +165,9 @@ define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfl
 
 define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: clasta_n_f32:
-; CHECK: clasta s0, p0, s0, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta s0, p0, s0, z1.s
+; CHECK-NEXT:    ret
   %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
                                                        float %a,
                                                        <vscale x 4 x float> %b)
@@ -160,8 +176,9 @@ define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float>
 
 define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: clasta_n_f64:
-; CHECK: clasta d0, p0, d0, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clasta d0, p0, d0, z1.d
+; CHECK-NEXT:    ret
   %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg,
                                                         double %a,
                                                         <vscale x 2 x double> %b)
@@ -174,8 +191,9 @@ define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x doub
 
 define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: clastb_i8:
-; CHECK: clastb z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -184,8 +202,9 @@ define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: clastb_i16:
-; CHECK: clastb z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -194,8 +213,9 @@ define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: clastb_i32:
-; CHECK: clastb z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -204,8 +224,9 @@ define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: clastb_i64:
-; CHECK: clastb z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -214,8 +235,9 @@ define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: clastb_f16:
-; CHECK: clastb z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %a,
                                                                    <vscale x 8 x half> %b)
@@ -224,8 +246,9 @@ define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
 
 define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: clastb_bf16:
-; CHECK: clastb z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
                                                                       <vscale x 8 x bfloat> %a,
                                                                       <vscale x 8 x bfloat> %b)
@@ -234,8 +257,9 @@ define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x b
 
 define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: clastb_f32:
-; CHECK: clastb z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %a,
                                                                     <vscale x 4 x float> %b)
@@ -244,8 +268,9 @@ define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x flo
 
 define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: clastb_f64:
-; CHECK: clastb z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %a,
                                                                      <vscale x 2 x double> %b)
@@ -258,8 +283,9 @@ define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x do
 
 define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: clastb_n_i8:
-; CHECK: clastb w0, p0, w0, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb w0, p0, w0, z0.b
+; CHECK-NEXT:    ret
   %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg,
                                                     i8 %a,
                                                     <vscale x 16 x i8> %b)
@@ -268,8 +294,9 @@ define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
 
 define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: clastb_n_i16:
-; CHECK: clastb w0, p0, w0, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb w0, p0, w0, z0.h
+; CHECK-NEXT:    ret
   %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg,
                                                      i16 %a,
                                                      <vscale x 8 x i16> %b)
@@ -278,8 +305,9 @@ define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
 
 define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: clastb_n_i32:
-; CHECK: clastb w0, p0, w0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb w0, p0, w0, z0.s
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg,
                                                      i32 %a,
                                                      <vscale x 4 x i32> %b)
@@ -288,8 +316,9 @@ define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
 
 define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: clastb_n_i64:
-; CHECK: clastb x0, p0, x0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb x0, p0, x0, z0.d
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg,
                                                      i64 %a,
                                                      <vscale x 2 x i64> %b)
@@ -298,8 +327,9 @@ define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
 
 define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: clastb_n_f16:
-; CHECK: clastb h0, p0, h0, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb h0, p0, h0, z1.h
+; CHECK-NEXT:    ret
   %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
                                                       half %a,
                                                       <vscale x 8 x half> %b)
@@ -308,8 +338,9 @@ define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b
 
 define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: clastb_n_bf16:
-; CHECK: clastb h0, p0, h0, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb h0, p0, h0, z1.h
+; CHECK-NEXT:    ret
   %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
                                                          bfloat %a,
                                                          <vscale x 8 x bfloat> %b)
@@ -318,8 +349,9 @@ define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfl
 
 define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: clastb_n_f32:
-; CHECK: clastb s0, p0, s0, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb s0, p0, s0, z1.s
+; CHECK-NEXT:    ret
   %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
                                                        float %a,
                                                        <vscale x 4 x float> %b)
@@ -328,8 +360,9 @@ define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float>
 
 define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: clastb_n_f64:
-; CHECK: clastb d0, p0, d0, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clastb d0, p0, d0, z1.d
+; CHECK-NEXT:    ret
   %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg,
                                                         double %a,
                                                         <vscale x 2 x double> %b)
@@ -342,64 +375,72 @@ define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x doub
 
 define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: dupq_i8:
-; CHECK: mov z0.q, q0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, q0
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: dupq_i16:
-; CHECK: mov z0.q, z0.q[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, z0.q[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: dupq_i32:
-; CHECK: mov z0.q, z0.q[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, z0.q[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: dupq_i64:
-; CHECK: mov z0.q, z0.q[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, z0.q[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3)
   ret <vscale x 2 x i64> %out
 }
 
 define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
 ; CHECK-LABEL: dupq_f16:
-; CHECK: mov z0.q, q0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, q0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: dupq_bf16:
-; CHECK: mov z0.q, q0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, q0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
   ret <vscale x 8 x bfloat> %out
 }
 
 define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
 ; CHECK-LABEL: dupq_f32:
-; CHECK: mov z0.q, z0.q[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, z0.q[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
 ; CHECK-LABEL: dupq_f64:
-; CHECK: mov z0.q, z0.q[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.q, z0.q[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2)
   ret <vscale x 2 x double> %out
 }
@@ -410,13 +451,14 @@ define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
 
 define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_i8:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK-NEXT: tbl   z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx)
   ret <vscale x 16 x i8> %out
 }
@@ -424,13 +466,14 @@ define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_i16:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx)
   ret <vscale x 8 x i16> %out
 }
@@ -438,13 +481,14 @@ define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_i32:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx)
   ret <vscale x 4 x i32> %out
 }
@@ -452,13 +496,14 @@ define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_i64:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx)
   ret <vscale x 2 x i64> %out
 }
@@ -466,13 +511,14 @@ define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_f16:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
   ret <vscale x 8 x half> %out
 }
@@ -480,13 +526,14 @@ define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
 ; CHECK-LABEL: dupq_lane_bf16:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
   ret <vscale x 8 x bfloat> %out
 }
@@ -494,13 +541,14 @@ define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx)
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_f32:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx)
   ret <vscale x 4 x float> %out
 }
@@ -508,13 +556,14 @@ define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
 define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) {
 ; CHECK-LABEL: dupq_lane_f64:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
-; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
-; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
-; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    add z1.d, z1.d, z2.d
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx)
   ret <vscale x 2 x double> %out
 }
@@ -522,11 +571,12 @@ define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx)
 ; NOTE: Index out of range (0-3)
 define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: dupq_i64_range:
-; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
-; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
-; CHECK-DAG:  add   [[Z3:z[0-9]+]].d, [[Z2]].d, #8
-; CHECK: tbl z0.d, { z0.d }, [[Z3]].d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    and z1.d, z1.d, #0x1
+; CHECK-NEXT:    add z1.d, z1.d, #8 // =0x8
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
   ret <vscale x 2 x i64> %out
 }
@@ -633,8 +683,9 @@ define dso_local <vscale x 16 x i8> @dupq_ld1rqd_i8() {
 
 define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ext_i8:
-; CHECK: ext z0.b, z0.b, z1.b, #255
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #255
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                i32 255)
@@ -643,8 +694,9 @@ define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ext_i16:
-; CHECK: ext z0.b, z0.b, z1.b, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                i32 0)
@@ -653,8 +705,9 @@ define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 
 define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ext_i32:
-; CHECK: ext z0.b, z0.b, z1.b, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                i32 1)
@@ -663,8 +716,9 @@ define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 
 define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ext_i64:
-; CHECK: ext z0.b, z0.b, z1.b, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #16
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                i32 2)
@@ -673,8 +727,9 @@ define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 
 define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: ext_bf16:
-; CHECK: ext z0.b, z0.b, z1.b, #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #6
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                    <vscale x 8 x bfloat> %b,
                                                                    i32 3)
@@ -683,8 +738,9 @@ define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x b
 
 define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: ext_f16:
-; CHECK: ext z0.b, z0.b, z1.b, #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #6
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
                                                                 <vscale x 8 x half> %b,
                                                                 i32 3)
@@ -693,8 +749,9 @@ define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: ext_f32:
-; CHECK: ext z0.b, z0.b, z1.b, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #16
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
                                                                  <vscale x 4 x float> %b,
                                                                  i32 4)
@@ -703,8 +760,9 @@ define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x floa
 
 define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: ext_f64:
-; CHECK: ext z0.b, z0.b, z1.b, #40
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #40
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
                                                                   <vscale x 2 x double> %b,
                                                                   i32 5)
@@ -716,81 +774,90 @@ define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x do
 ;
 
 define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: lasta_i8
-; CHECK: lasta w0, p0, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta w0, p0, z0.b
+; CHECK-NEXT:    ret
   %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg,
                                                  <vscale x 16 x i8> %a)
   ret i8 %res
 }
 
 define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: lasta_i16
-; CHECK: lasta w0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta w0, p0, z0.h
+; CHECK-NEXT:    ret
   %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg,
                                                   <vscale x 8 x i16> %a)
   ret i16 %res
 }
 
 define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: lasta_i32
-; CHECK: lasta w0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta w0, p0, z0.s
+; CHECK-NEXT:    ret
   %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg,
                                                   <vscale x 4 x i32> %a)
   ret i32 %res
 }
 
 define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL:  lasta_i64
-; CHECK: lasta x0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta x0, p0, z0.d
+; CHECK-NEXT:    ret
   %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg,
                                                   <vscale x 2 x i64> %a)
   ret i64 %res
 }
 
 define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
-; CHECK-LABEL: lasta_f16
-; CHECK: lasta h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta h0, p0, z0.h
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
   ret half %res
 }
 
 define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
-; CHECK-LABEL: lasta_bf16
-; CHECK: lasta h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta h0, p0, z0.h
+; CHECK-NEXT:    ret
   %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
                                                       <vscale x 8 x bfloat> %a)
   ret bfloat %res
 }
 
 define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
-; CHECK-LABEL: lasta_f32
-; CHECK: lasta s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta s0, p0, z0.s
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
   ret float %res
 }
 
 define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
-; CHECK-LABEL: lasta_f32_v2
-; CHECK: lasta s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_f32_v2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta s0, p0, z0.s
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg,
                                                     <vscale x 2 x float> %a)
   ret float %res
 }
 
 define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
-; CHECK-LABEL:  lasta_f64
-; CHECK: lasta d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: lasta_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lasta d0, p0, z0.d
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
   ret double %res
@@ -801,81 +868,90 @@ define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ;
 
 define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: lastb_i8
-; CHECK: lastb w0, p0, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb w0, p0, z0.b
+; CHECK-NEXT:    ret
   %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg,
                                                  <vscale x 16 x i8> %a)
   ret i8 %res
 }
 
 define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
-; CHECK-LABEL: lastb_i16
-; CHECK: lastb w0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb w0, p0, z0.h
+; CHECK-NEXT:    ret
   %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg,
                                                   <vscale x 8 x i16> %a)
   ret i16 %res
 }
 
 define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
-; CHECK-LABEL: lastb_i32
-; CHECK: lastb w0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb w0, p0, z0.s
+; CHECK-NEXT:    ret
   %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg,
                                                   <vscale x 4 x i32> %a)
   ret i32 %res
 }
 
 define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
-; CHECK-LABEL:  lastb_i64
-; CHECK: lastb x0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb x0, p0, z0.d
+; CHECK-NEXT:    ret
   %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg,
                                                   <vscale x 2 x i64> %a)
   ret i64 %res
 }
 
 define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
-; CHECK-LABEL: lastb_f16
-; CHECK: lastb h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb h0, p0, z0.h
+; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
   ret half %res
 }
 
 define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
-; CHECK-LABEL: lastb_bf16
-; CHECK: lastb h0, p0, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb h0, p0, z0.h
+; CHECK-NEXT:    ret
   %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
                                                       <vscale x 8 x bfloat> %a)
   ret bfloat %res
 }
 
 define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
-; CHECK-LABEL: lastb_f32
-; CHECK: lastb s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb s0, p0, z0.s
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
   ret float %res
 }
 
 define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
-; CHECK-LABEL: lastb_f32_v2
-; CHECK: lastb s0, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_f32_v2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb s0, p0, z0.s
+; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg,
                                                     <vscale x 2 x float> %a)
   ret float %res
 }
 
 define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
-; CHECK-LABEL:  lastb_f64
-; CHECK: lastb d0, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: lastb_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lastb d0, p0, z0.d
+; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
   ret double %res
@@ -887,8 +963,9 @@ define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 
 define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: compact_i32:
-; CHECK: compact z0.s, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x i32> %a)
   ret <vscale x 4 x i32> %out
@@ -896,8 +973,9 @@ define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: compact_i64:
-; CHECK: compact z0.d, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    compact z0.d, p0, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x i64> %a)
   ret <vscale x 2 x i64> %out
@@ -905,8 +983,9 @@ define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: compact_f32:
-; CHECK: compact z0.s, p0, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x float> %a)
   ret <vscale x 4 x float> %out
@@ -914,8 +993,9 @@ define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: compact_f64:
-; CHECK: compact z0.d, p0, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    compact z0.d, p0, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
                                                                       <vscale x 2 x double> %a)
   ret <vscale x 2 x double> %out
@@ -926,97 +1006,109 @@ define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
 ;
 
 define <vscale x 16 x i1> @rev_b8( <vscale x 16 x i1> %a) {
-; CHECK-LABEL: rev_b8
-; CHECK: rev p0.b, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_b8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a)
   ret <vscale x 16 x i1> %res
 }
 
 define <vscale x 8 x i1> @rev_b16(<vscale x 8 x i1> %a) {
-; CHECK-LABEL: rev_b16
-; CHECK: rev p0.h, p0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_b16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a)
   ret <vscale x 8 x i1> %res
 }
 
 define <vscale x 4 x i1> @rev_b32(<vscale x 4 x i1> %a) {
-; CHECK-LABEL: rev_b32
-; CHECK: rev p0.s, p0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_b32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a)
   ret <vscale x 4 x i1> %res
 }
 
 define <vscale x 2 x i1> @rev_b64(<vscale x 2 x i1> %a) {
-; CHECK-LABEL:  rev_b64
-; CHECK: rev p0.d, p0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_b64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a)
   ret <vscale x 2 x i1> %res
 }
 
 define <vscale x 16 x i8> @rev_i8( <vscale x 16 x i8> %a) {
-; CHECK-LABEL: rev_i8
-; CHECK: rev z0.b, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.b, z0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a)
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: rev_i16
-; CHECK: rev z0.h, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.h, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: rev_i32
-; CHECK: rev z0.s, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.s, z0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) {
-; CHECK-LABEL:  rev_i64
-; CHECK: rev z0.d, z0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.d, z0.d
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a)
   ret <vscale x 2 x i64> %res
 }
 
 define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 {
-; CHECK-LABEL: rev_bf16
-; CHECK: rev z0.h, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.h, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a)
   ret <vscale x 8 x bfloat> %res
 }
 
 define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) {
-; CHECK-LABEL: rev_f16
-; CHECK: rev z0.h, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.h, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a)
   ret <vscale x 8 x half> %res
 }
 
 define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) {
-; CHECK-LABEL: rev_f32
-; CHECK: rev z0.s, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.s, z0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a)
   ret <vscale x 4 x float> %res
 }
 
 define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
-; CHECK-LABEL:  rev_f64
-; CHECK: rev z0.d, z0.d
-; CHECK-NEXT: ret
+; CHECK-LABEL: rev_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.d, z0.d
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a)
   ret <vscale x 2 x double> %res
 }
@@ -1027,8 +1119,9 @@ define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
 
 define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: splice_i8:
-; CHECK: splice z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -1037,8 +1130,9 @@ define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: splice_i16:
-; CHECK: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -1047,8 +1141,9 @@ define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: splice_i32:
-; CHECK: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -1057,8 +1152,9 @@ define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: splice_i64:
-; CHECK: splice z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -1067,8 +1163,9 @@ define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: splice_bf16:
-; CHECK: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
                                                                       <vscale x 8 x bfloat> %a,
                                                                       <vscale x 8 x bfloat> %b)
@@ -1077,8 +1174,9 @@ define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x b
 
 define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: splice_f16:
-; CHECK: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x half> %a,
                                                                    <vscale x 8 x half> %b)
@@ -1087,8 +1185,9 @@ define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
 
 define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: splice_f32:
-; CHECK: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x float> %a,
                                                                     <vscale x 4 x float> %b)
@@ -1097,8 +1196,9 @@ define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x flo
 
 define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: splice_f64:
-; CHECK: splice z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
                                                                      <vscale x 2 x double> %a,
                                                                      <vscale x 2 x double> %b)
@@ -1110,25 +1210,28 @@ define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x do
 ;
 
 define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: sunpkhi_i16
-; CHECK: sunpkhi z0.h, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: sunpkhi_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpkhi z0.h, z0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: sunpkhi_i32
-; CHECK: sunpkhi z0.s, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: sunpkhi_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
-; CHECK-LABEL:  sunpkhi_i64
-; CHECK: sunpkhi z0.d, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: sunpkhi_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a)
   ret <vscale x 2 x i64> %res
 }
@@ -1138,25 +1241,28 @@ define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
 ;
 
 define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: sunpklo_i16
-; CHECK: sunpklo z0.h, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: sunpklo_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: sunpklo_i32
-; CHECK: sunpklo z0.s, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: sunpklo_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
-; CHECK-LABEL:  sunpklo_i64
-; CHECK: sunpklo z0.d, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: sunpklo_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a)
   ret <vscale x 2 x i64> %res
 }
@@ -1167,8 +1273,9 @@ define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
 
 define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: tbl_i8:
-; CHECK: tbl z0.b, { z0.b }, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.b, { z0.b }, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -1176,8 +1283,9 @@ define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: tbl_i16:
-; CHECK: tbl z0.h, { z0.h }, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -1185,8 +1293,9 @@ define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 
 define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: tbl_i32:
-; CHECK: tbl z0.s, { z0.s }, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.s, { z0.s }, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -1194,8 +1303,9 @@ define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 
 define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: tbl_i64:
-; CHECK: tbl z0.d, { z0.d }, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -1203,8 +1313,9 @@ define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 
 define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: tbl_f16:
-; CHECK: tbl z0.h, { z0.h }, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x half> %out
@@ -1212,8 +1323,9 @@ define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 {
 ; CHECK-LABEL: tbl_bf16:
-; CHECK: tbl z0.h, { z0.h }, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                    <vscale x 8 x i16> %b)
   ret <vscale x 8 x bfloat> %out
@@ -1221,8 +1333,9 @@ define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i
 
 define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: tbl_f32:
-; CHECK: tbl z0.s, { z0.s }, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.s, { z0.s }, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a,
                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 4 x float> %out
@@ -1230,8 +1343,9 @@ define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: tbl_f64:
-; CHECK: tbl z0.d, { z0.d }, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a,
                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x double> %out
@@ -1242,25 +1356,28 @@ define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i6
 ;
 
 define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: uunpkhi_i16
-; CHECK: uunpkhi z0.h, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: uunpkhi_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: uunpkhi_i32
-; CHECK: uunpkhi z0.s, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: uunpkhi_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
-; CHECK-LABEL:  uunpkhi_i64
-; CHECK: uunpkhi z0.d, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: uunpkhi_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a)
   ret <vscale x 2 x i64> %res
 }
@@ -1270,25 +1387,28 @@ define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
 ;
 
 define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: uunpklo_i16
-; CHECK: uunpklo z0.h, z0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: uunpklo_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: uunpklo_i32
-; CHECK: uunpklo z0.s, z0.h
-; CHECK-NEXT: ret
+; CHECK-LABEL: uunpklo_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
-; CHECK-LABEL:  uunpklo_i64
-; CHECK: uunpklo z0.d, z0.s
-; CHECK-NEXT: ret
+; CHECK-LABEL: uunpklo_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a)
   ret <vscale x 2 x i64> %res
 }
@@ -1299,8 +1419,9 @@ define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
 
 define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: trn1_b8:
-; CHECK: trn1 p0.b, p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a,
                                                                 <vscale x 16 x i1> %b)
   ret <vscale x 16 x i1> %out
@@ -1308,8 +1429,9 @@ define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
 
 define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: trn1_b16:
-; CHECK: trn1 p0.h, p0.h, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a,
                                                               <vscale x 8 x i1> %b)
   ret <vscale x 8 x i1> %out
@@ -1317,8 +1439,9 @@ define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 
 define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: trn1_b32:
-; CHECK: trn1 p0.s, p0.s, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a,
                                                               <vscale x 4 x i1> %b)
   ret <vscale x 4 x i1> %out
@@ -1326,8 +1449,9 @@ define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 
 define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: trn1_b64:
-; CHECK: trn1 p0.d, p0.d, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 p0.d, p0.d, p1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a,
                                                               <vscale x 2 x i1> %b)
   ret <vscale x 2 x i1> %out
@@ -1335,8 +1459,9 @@ define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 
 define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: trn1_i8:
-; CHECK: trn1 z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -1344,8 +1469,9 @@ define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: trn1_i16:
-; CHECK: trn1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -1353,8 +1479,9 @@ define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: trn1_i32:
-; CHECK: trn1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -1362,8 +1489,9 @@ define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: trn1_i64:
-; CHECK: trn1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -1371,8 +1499,9 @@ define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: trn1_f16_v2:
-; CHECK: trn1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half> %a,
                                                                  <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %out
@@ -1380,8 +1509,9 @@ define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x ha
 
 define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: trn1_f16_v4:
-; CHECK: trn1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a,
                                                                  <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %out
@@ -1389,8 +1519,9 @@ define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x ha
 
 define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: trn1_bf16:
-; CHECK: trn1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -1398,8 +1529,9 @@ define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: trn1_f16:
-; CHECK: trn1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -1407,8 +1539,9 @@ define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: trn1_f32_v2:
-; CHECK: trn1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float> %a,
                                                                   <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %out
@@ -1416,8 +1549,9 @@ define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: trn1_f32:
-; CHECK: trn1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -1425,8 +1559,9 @@ define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: trn1_f64:
-; CHECK: trn1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -1438,8 +1573,9 @@ define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: trn2_b8:
-; CHECK: trn2 p0.b, p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a,
                                                                 <vscale x 16 x i1> %b)
   ret <vscale x 16 x i1> %out
@@ -1447,8 +1583,9 @@ define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
 
 define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: trn2_b16:
-; CHECK: trn2 p0.h, p0.h, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a,
                                                               <vscale x 8 x i1> %b)
   ret <vscale x 8 x i1> %out
@@ -1456,8 +1593,9 @@ define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 
 define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: trn2_b32:
-; CHECK: trn2 p0.s, p0.s, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a,
                                                               <vscale x 4 x i1> %b)
   ret <vscale x 4 x i1> %out
@@ -1465,8 +1603,9 @@ define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 
 define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: trn2_b64:
-; CHECK: trn2 p0.d, p0.d, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 p0.d, p0.d, p1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a,
                                                               <vscale x 2 x i1> %b)
   ret <vscale x 2 x i1> %out
@@ -1474,8 +1613,9 @@ define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 
 define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: trn2_i8:
-; CHECK: trn2 z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -1483,8 +1623,9 @@ define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: trn2_i16:
-; CHECK: trn2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -1492,8 +1633,9 @@ define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: trn2_i32:
-; CHECK: trn2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -1501,8 +1643,9 @@ define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: trn2_i64:
-; CHECK: trn2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -1510,8 +1653,9 @@ define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: trn2_f16_v2:
-; CHECK: trn2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half> %a,
                                                                  <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %out
@@ -1519,8 +1663,9 @@ define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x ha
 
 define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: trn2_f16_v4:
-; CHECK: trn2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a,
                                                                  <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %out
@@ -1528,8 +1673,9 @@ define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x ha
 
 define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: trn2_bf16:
-; CHECK: trn2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -1537,8 +1683,9 @@ define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: trn2_f16:
-; CHECK: trn2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -1546,8 +1693,9 @@ define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: trn2_f32_v2:
-; CHECK: trn2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float> %a,
                                                                   <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %out
@@ -1555,8 +1703,9 @@ define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: trn2_f32:
-; CHECK: trn2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -1564,8 +1713,9 @@ define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: trn2_f64:
-; CHECK: trn2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -1577,8 +1727,9 @@ define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: uzp1_b8:
-; CHECK: uzp1 p0.b, p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a,
                                                                 <vscale x 16 x i1> %b)
   ret <vscale x 16 x i1> %out
@@ -1586,8 +1737,9 @@ define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
 
 define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uzp1_b16:
-; CHECK: uzp1 p0.h, p0.h, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a,
                                                               <vscale x 8 x i1> %b)
   ret <vscale x 8 x i1> %out
@@ -1595,8 +1747,9 @@ define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 
 define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uzp1_b32:
-; CHECK: uzp1 p0.s, p0.s, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a,
                                                               <vscale x 4 x i1> %b)
   ret <vscale x 4 x i1> %out
@@ -1604,8 +1757,9 @@ define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 
 define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uzp1_b64:
-; CHECK: uzp1 p0.d, p0.d, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a,
                                                               <vscale x 2 x i1> %b)
   ret <vscale x 2 x i1> %out
@@ -1613,8 +1767,9 @@ define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 
 define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uzp1_i8:
-; CHECK: uzp1 z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -1622,8 +1777,9 @@ define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uzp1_i16:
-; CHECK: uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -1631,8 +1787,9 @@ define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uzp1_i32:
-; CHECK: uzp1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -1640,8 +1797,9 @@ define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uzp1_i64:
-; CHECK: uzp1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -1649,8 +1807,9 @@ define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: uzp1_f16_v2:
-; CHECK: uzp1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half> %a,
                                                                  <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %out
@@ -1658,8 +1817,9 @@ define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x ha
 
 define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: uzp1_f16_v4:
-; CHECK: uzp1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a,
                                                                  <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %out
@@ -1667,8 +1827,9 @@ define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x ha
 
 define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: uzp1_bf16:
-; CHECK: uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -1676,8 +1837,9 @@ define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: uzp1_f16:
-; CHECK: uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -1685,8 +1847,9 @@ define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: uzp1_f32_v2:
-; CHECK: uzp1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float> %a,
                                                                   <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %out
@@ -1694,8 +1857,9 @@ define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: uzp1_f32:
-; CHECK: uzp1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -1703,8 +1867,9 @@ define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: uzp1_f64:
-; CHECK: uzp1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -1716,8 +1881,9 @@ define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: uzp2_b8:
-; CHECK: uzp2 p0.b, p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a,
                                                                 <vscale x 16 x i1> %b)
   ret <vscale x 16 x i1> %out
@@ -1725,8 +1891,9 @@ define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
 
 define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uzp2_b16:
-; CHECK: uzp2 p0.h, p0.h, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a,
                                                               <vscale x 8 x i1> %b)
   ret <vscale x 8 x i1> %out
@@ -1734,8 +1901,9 @@ define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 
 define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uzp2_b32:
-; CHECK: uzp2 p0.s, p0.s, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a,
                                                               <vscale x 4 x i1> %b)
   ret <vscale x 4 x i1> %out
@@ -1743,8 +1911,9 @@ define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 
 define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uzp2_b64:
-; CHECK: uzp2 p0.d, p0.d, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 p0.d, p0.d, p1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a,
                                                               <vscale x 2 x i1> %b)
   ret <vscale x 2 x i1> %out
@@ -1752,8 +1921,9 @@ define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 
 define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uzp2_i8:
-; CHECK: uzp2 z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -1761,8 +1931,9 @@ define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uzp2_i16:
-; CHECK: uzp2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -1770,8 +1941,9 @@ define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uzp2_i32:
-; CHECK: uzp2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -1779,8 +1951,9 @@ define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uzp2_i64:
-; CHECK: uzp2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -1788,8 +1961,9 @@ define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: uzp2_f16_v2:
-; CHECK: uzp2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half> %a,
                                                                  <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %out
@@ -1797,8 +1971,9 @@ define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x ha
 
 define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: uzp2_f16_v4:
-; CHECK: uzp2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a,
                                                                  <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %out
@@ -1806,8 +1981,9 @@ define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x ha
 
 define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: uzp2_bf16:
-; CHECK: uzp2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -1815,8 +1991,9 @@ define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: uzp2_f16:
-; CHECK: uzp2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -1824,8 +2001,9 @@ define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: uzp2_f32_v2:
-; CHECK: uzp2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float> %a,
                                                                   <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %out
@@ -1833,8 +2011,9 @@ define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: uzp2_f32:
-; CHECK: uzp2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -1842,8 +2021,9 @@ define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: uzp2_f64:
-; CHECK: uzp2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -1855,8 +2035,9 @@ define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: zip1_b8:
-; CHECK: zip1 p0.b, p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a,
                                                                 <vscale x 16 x i1> %b)
   ret <vscale x 16 x i1> %out
@@ -1864,8 +2045,9 @@ define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
 
 define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: zip1_b16:
-; CHECK: zip1 p0.h, p0.h, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a,
                                                               <vscale x 8 x i1> %b)
   ret <vscale x 8 x i1> %out
@@ -1873,8 +2055,9 @@ define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 
 define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: zip1_b32:
-; CHECK: zip1 p0.s, p0.s, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a,
                                                               <vscale x 4 x i1> %b)
   ret <vscale x 4 x i1> %out
@@ -1882,8 +2065,9 @@ define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 
 define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: zip1_b64:
-; CHECK: zip1 p0.d, p0.d, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 p0.d, p0.d, p1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a,
                                                               <vscale x 2 x i1> %b)
   ret <vscale x 2 x i1> %out
@@ -1891,8 +2075,9 @@ define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 
 define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: zip1_i8:
-; CHECK: zip1 z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -1900,8 +2085,9 @@ define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: zip1_i16:
-; CHECK: zip1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -1909,8 +2095,9 @@ define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: zip1_i32:
-; CHECK: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -1918,8 +2105,9 @@ define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: zip1_i64:
-; CHECK: zip1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -1927,8 +2115,9 @@ define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: zip1_f16_v2:
-; CHECK: zip1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half> %a,
                                                                  <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %out
@@ -1936,8 +2125,9 @@ define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x ha
 
 define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: zip1_f16_v4:
-; CHECK: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a,
                                                                  <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %out
@@ -1945,8 +2135,9 @@ define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x ha
 
 define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: zip1_bf16:
-; CHECK: zip1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -1954,8 +2145,9 @@ define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: zip1_f16:
-; CHECK: zip1 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -1963,8 +2155,9 @@ define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: zip1_f32_v2:
-; CHECK: zip1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float> %a,
                                                                   <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %out
@@ -1972,8 +2165,9 @@ define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: zip1_f32:
-; CHECK: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -1981,8 +2175,9 @@ define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: zip1_f64:
-; CHECK: zip1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
@@ -1994,8 +2189,9 @@ define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
 
 define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: zip2_b8:
-; CHECK: zip2 p0.b, p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a,
                                                                 <vscale x 16 x i1> %b)
   ret <vscale x 16 x i1> %out
@@ -2003,8 +2199,9 @@ define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
 
 define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: zip2_b16:
-; CHECK: zip2 p0.h, p0.h, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a,
                                                               <vscale x 8 x i1> %b)
   ret <vscale x 8 x i1> %out
@@ -2012,8 +2209,9 @@ define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 
 define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: zip2_b32:
-; CHECK: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a,
                                                               <vscale x 4 x i1> %b)
   ret <vscale x 4 x i1> %out
@@ -2021,8 +2219,9 @@ define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 
 define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: zip2_b64:
-; CHECK: zip2 p0.d, p0.d, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 p0.d, p0.d, p1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a,
                                                               <vscale x 2 x i1> %b)
   ret <vscale x 2 x i1> %out
@@ -2030,8 +2229,9 @@ define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 
 define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: zip2_i8:
-; CHECK: zip2 z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -2039,8 +2239,9 @@ define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: zip2_i16:
-; CHECK: zip2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -2048,8 +2249,9 @@ define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: zip2_i32:
-; CHECK: zip2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -2057,8 +2259,9 @@ define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: zip2_i64:
-; CHECK: zip2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -2066,8 +2269,9 @@ define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: zip2_f16_v2:
-; CHECK: zip2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half> %a,
                                                                  <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %out
@@ -2075,8 +2279,9 @@ define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x ha
 
 define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: zip2_f16_v4:
-; CHECK: zip2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a,
                                                                  <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %out
@@ -2084,8 +2289,9 @@ define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x ha
 
 define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: zip2_bf16:
-; CHECK: zip2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %out
@@ -2093,8 +2299,9 @@ define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 
 define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: zip2_f16:
-; CHECK: zip2 z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
@@ -2102,8 +2309,9 @@ define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 
 define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: zip2_f32_v2:
-; CHECK: zip2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float> %a,
                                                                   <vscale x 2 x float> %b)
   ret <vscale x 2 x float> %out
@@ -2111,8 +2319,9 @@ define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x
 
 define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: zip2_f32:
-; CHECK: zip2 z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
@@ -2120,8 +2329,9 @@ define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x flo
 
 define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: zip2_f64:
-; CHECK: zip2 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
index 43749ad37d8c8..d549597a6a6d9 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,32 +8,36 @@
 
 define <vscale x 16 x i1> @ptrue_b8() {
 ; CHECK-LABEL: ptrue_b8:
-; CHECK: ptrue p0.b, pow2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b, pow2
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 0)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @ptrue_b16() {
 ; CHECK-LABEL: ptrue_b16:
-; CHECK: ptrue p0.h, vl1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h, vl1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 1)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @ptrue_b32() {
 ; CHECK-LABEL: ptrue_b32:
-; CHECK: ptrue p0.s, mul3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s, mul3
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 30)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @ptrue_b64() {
 ; CHECK-LABEL: ptrue_b64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   ret <vscale x 2 x i1> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
index 8341e8f68aca6..360afcf895dc7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i1> @brka_m_b8(<vscale x 16 x i1> %inactive, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: brka_m_b8:
-; CHECK: brka p0.b, p1/m, p2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brka p0.b, p1/m, p2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brka.nxv16i1(<vscale x 16 x i1> %inactive,
                                                                 <vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i1> %a)
@@ -17,8 +19,9 @@ define <vscale x 16 x i1> @brka_m_b8(<vscale x 16 x i1> %inactive, <vscale x 16
 
 define <vscale x 16 x i1> @brka_z_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: brka_z_b8:
-; CHECK: brka p0.b, p0/z, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brka p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i1> %a)
   ret <vscale x 16 x i1> %out
@@ -30,8 +33,9 @@ define <vscale x 16 x i1> @brka_z_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1>
 
 define <vscale x 16 x i1> @brkb_m_b8(<vscale x 16 x i1> %inactive, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: brkb_m_b8:
-; CHECK: brkb p0.b, p1/m, p2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brkb p0.b, p1/m, p2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.nxv16i1(<vscale x 16 x i1> %inactive,
                                                                 <vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i1> %a)
@@ -40,8 +44,9 @@ define <vscale x 16 x i1> @brkb_m_b8(<vscale x 16 x i1> %inactive, <vscale x 16
 
 define <vscale x 16 x i1> @brkb_z_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: brkb_z_b8:
-; CHECK: brkb p0.b, p0/z, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brkb p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i1> %a)
   ret <vscale x 16 x i1> %out
@@ -53,9 +58,10 @@ define <vscale x 16 x i1> @brkb_z_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1>
 
 define <vscale x 16 x i1> @brkn_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: brkn_b8:
-; CHECK: brkn p2.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: mov p0.b, p2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brkn p2.b, p0/z, p1.b, p2.b
+; CHECK-NEXT:    mov p0.b, p2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i1> %a,
                                                                   <vscale x 16 x i1> %b)
@@ -68,8 +74,9 @@ define <vscale x 16 x i1> @brkn_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a
 
 define <vscale x 16 x i1> @brkpa_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: brkpa_b8:
-; CHECK: brkpa p0.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brkpa p0.b, p0/z, p1.b, p2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1> %pg,
                                                                    <vscale x 16 x i1> %a,
                                                                    <vscale x 16 x i1> %b)
@@ -82,8 +89,9 @@ define <vscale x 16 x i1> @brkpa_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %
 
 define <vscale x 16 x i1> @brkpb_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: brkpb_b8:
-; CHECK: brkpb p0.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    brkpb p0.b, p0/z, p1.b, p2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1> %pg,
                                                                    <vscale x 16 x i1> %a,
                                                                    <vscale x 16 x i1> %b)
@@ -96,9 +104,10 @@ define <vscale x 16 x i1> @brkpb_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %
 
 define <vscale x 16 x i1> @pfirst_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: pfirst_b8:
-; CHECK: pfirst p1.b, p0, p1.b
-; CHECK-NEXT: mov p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfirst p1.b, p0, p1.b
+; CHECK-NEXT:    mov p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.pfirst.nxv16i1(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i1> %a)
   ret <vscale x 16 x i1> %out
@@ -110,9 +119,10 @@ define <vscale x 16 x i1> @pfirst_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1>
 
 define <vscale x 16 x i1> @pnext_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: pnext_b8:
-; CHECK: pnext p1.b, p0, p1.b
-; CHECK-NEXT: mov p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pnext p1.b, p0, p1.b
+; CHECK-NEXT:    mov p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.pnext.nxv16i1(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i1> %a)
   ret <vscale x 16 x i1> %out
@@ -120,9 +130,10 @@ define <vscale x 16 x i1> @pnext_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %
 
 define <vscale x 8 x i1> @pnext_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
 ; CHECK-LABEL: pnext_b16:
-; CHECK: pnext p1.h, p0, p1.h
-; CHECK-NEXT: mov p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pnext p1.h, p0, p1.h
+; CHECK-NEXT:    mov p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i1> %a)
   ret <vscale x 8 x i1> %out
@@ -130,9 +141,10 @@ define <vscale x 8 x i1> @pnext_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a)
 
 define <vscale x 4 x i1> @pnext_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
 ; CHECK-LABEL: pnext_b32:
-; CHECK: pnext p1.s, p0, p1.s
-; CHECK-NEXT: mov p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pnext p1.s, p0, p1.s
+; CHECK-NEXT:    mov p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.pnext.nxv4i1(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i1> %a)
   ret <vscale x 4 x i1> %out
@@ -140,9 +152,10 @@ define <vscale x 4 x i1> @pnext_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a)
 
 define <vscale x 2 x i1> @pnext_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) {
 ; CHECK-LABEL: pnext_b64:
-; CHECK: pnext p1.d, p0, p1.d
-; CHECK-NEXT: mov p0.b, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pnext p1.d, p0, p1.d
+; CHECK-NEXT:    mov p0.b, p1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.pnext.nxv2i1(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i1> %a)
   ret <vscale x 2 x i1> %out
@@ -153,25 +166,28 @@ define <vscale x 2 x i1> @pnext_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a)
 ;
 
 define <vscale x 8 x i1> @punpkhi_b16(<vscale x 16 x i1> %a) {
-; CHECK-LABEL: punpkhi_b16
-; CHECK: punpkhi p0.h, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: punpkhi_b16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i1> @llvm.aarch64.sve.punpkhi.nxv8i1(<vscale x 16 x i1> %a)
   ret <vscale x 8 x i1> %res
 }
 
 define <vscale x 4 x i1> @punpkhi_b8(<vscale x 8 x i1> %a) {
-; CHECK-LABEL: punpkhi_b8
-; CHECK: punpkhi p0.h, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: punpkhi_b8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i1> @llvm.aarch64.sve.punpkhi.nxv4i1(<vscale x 8 x i1> %a)
   ret <vscale x 4 x i1> %res
 }
 
 define <vscale x 2 x i1> @punpkhi_b4(<vscale x 4 x i1> %a) {
-; CHECK-LABEL: punpkhi_b4
-; CHECK: punpkhi p0.h, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: punpkhi_b4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i1> @llvm.aarch64.sve.punpkhi.nxv2i1(<vscale x 4 x i1> %a)
   ret <vscale x 2 x i1> %res
 }
@@ -181,25 +197,28 @@ define <vscale x 2 x i1> @punpkhi_b4(<vscale x 4 x i1> %a) {
 ;
 
 define <vscale x 8 x i1> @punpklo_b16(<vscale x 16 x i1> %a) {
-; CHECK-LABEL: punpklo_b16
-; CHECK: punpklo p0.h, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: punpklo_b16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i1> @llvm.aarch64.sve.punpklo.nxv8i1(<vscale x 16 x i1> %a)
   ret <vscale x 8 x i1> %res
 }
 
 define <vscale x 4 x i1> @punpklo_b8(<vscale x 8 x i1> %a) {
-; CHECK-LABEL: punpklo_b8
-; CHECK: punpklo p0.h, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: punpklo_b8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i1> @llvm.aarch64.sve.punpklo.nxv4i1(<vscale x 8 x i1> %a)
   ret <vscale x 4 x i1> %res
 }
 
 define <vscale x 2 x i1> @punpklo_b4(<vscale x 4 x i1> %a) {
-; CHECK-LABEL: punpklo_b4
-; CHECK: punpklo p0.h, p0.b
-; CHECK-NEXT: ret
+; CHECK-LABEL: punpklo_b4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i1> @llvm.aarch64.sve.punpklo.nxv2i1(<vscale x 4 x i1> %a)
   ret <vscale x 2 x i1> %res
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
index 58d4e9e944710..d38d3e3f01981 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,27 +8,30 @@
 
 define i1 @ptest_any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: ptest_any:
-; CHECK: ptest p0, p1.b
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %out = call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
   ret i1 %out
 }
 
 define i1 @ptest_first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: ptest_first:
-; CHECK: ptest p0, p1.b
-; CHECK-NEXT: cset w0, mi
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
   %out = call i1 @llvm.aarch64.sve.ptest.first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
   ret i1 %out
 }
 
 define i1 @ptest_last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: ptest_last:
-; CHECK: ptest p0, p1.b
-; CHECK-NEXT: cset w0, lo
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
   %out = call i1 @llvm.aarch64.sve.ptest.last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
   ret i1 %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
index 73512a6bb5cc4..3ee23707cf29c 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @rbit_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: rbit_i8:
-; CHECK: rbit z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b)
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @rbit_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg
 
 define <vscale x 8 x i16> @rbit_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: rbit_i16:
-; CHECK: rbit z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
@@ -27,8 +30,9 @@ define <vscale x 8 x i16> @rbit_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x i32> @rbit_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: rbit_i32:
-; CHECK: rbit z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @rbit_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @rbit_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: rbit_i64:
-; CHECK: rbit z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -51,8 +56,9 @@ define <vscale x 2 x i64> @rbit_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 8 x i16> @revb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: revb_i16:
-; CHECK: revb z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revb z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
@@ -61,8 +67,9 @@ define <vscale x 8 x i16> @revb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg
 
 define <vscale x 4 x i32> @revb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: revb_i32:
-; CHECK: revb z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revb z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -71,8 +78,9 @@ define <vscale x 4 x i32> @revb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @revb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: revb_i64:
-; CHECK: revb z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revb z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -85,8 +93,9 @@ define <vscale x 2 x i64> @revb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 4 x i32> @revh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: revh_i32:
-; CHECK: revh z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revh z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
@@ -95,8 +104,9 @@ define <vscale x 4 x i32> @revh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg
 
 define <vscale x 2 x i64> @revh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: revh_i64:
-; CHECK: revh z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revh z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
@@ -109,8 +119,9 @@ define <vscale x 2 x i64> @revh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg
 
 define <vscale x 2 x i64> @revw_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: revw_i64:
-; CHECK: revw z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revw z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
index b6b1e3f3aeb0b..2d644688a5359 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @dup_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, i8 %b) {
 ; CHECK-LABEL: dup_i8:
-; CHECK: mov z0.b, p0/m, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, p0/m, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i1> %pg,
                                                                i8 %b)
@@ -16,8 +18,9 @@ define <vscale x 16 x i8> @dup_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg,
 
 define <vscale x 8 x i16> @dup_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, i16 %b) {
 ; CHECK-LABEL: dup_i16:
-; CHECK: mov z0.h, p0/m, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, p0/m, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i1> %pg,
                                                                i16 %b)
@@ -26,8 +29,9 @@ define <vscale x 8 x i16> @dup_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg,
 
 define <vscale x 4 x i32> @dup_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, i32 %b) {
 ; CHECK-LABEL: dup_i32:
-; CHECK: mov z0.s, p0/m, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, p0/m, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i1> %pg,
                                                                i32 %b)
@@ -36,8 +40,9 @@ define <vscale x 4 x i32> @dup_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,
 
 define <vscale x 2 x i64> @dup_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, i64 %b) {
 ; CHECK-LABEL: dup_i64:
-; CHECK: mov z0.d, p0/m, x0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, p0/m, x0
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i1> %pg,
                                                                i64 %b)
@@ -46,8 +51,9 @@ define <vscale x 2 x i64> @dup_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,
 
 define <vscale x 8 x half> @dup_f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, half %b) {
 ; CHECK-LABEL: dup_f16:
-; CHECK: mov z0.h, p0/m, h1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, p0/m, h1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 half %b)
@@ -56,8 +62,9 @@ define <vscale x 8 x half> @dup_f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %p
 
 define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, bfloat %b) #0 {
 ; CHECK-LABEL: dup_bf16:
-; CHECK: mov z0.h, p0/m, h1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, p0/m, h1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                    <vscale x 8 x i1> %pg,
                                                                    bfloat %b)
@@ -66,8 +73,9 @@ define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i
 
 define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, float %b) {
 ; CHECK-LABEL: dup_f32:
-; CHECK: mov z0.s, p0/m, s1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, p0/m, s1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  float %b)
@@ -76,8 +84,9 @@ define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1>
 
 define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, double %b) {
 ; CHECK-LABEL: dup_f64:
-; CHECK: mov z0.d, p0/m, d1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, p0/m, d1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %a,
                                                                   <vscale x 2 x i1> %pg,
                                                                   double %b)
@@ -86,18 +95,20 @@ define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1
 
 define <vscale x 8 x bfloat> @test_svdup_n_bf16_z(<vscale x 8 x i1> %pg, bfloat %op) #0 {
 ; CHECK-LABEL: test_svdup_n_bf16_z:
-; CHECK: mov z1.h, #0
-; CHECK: mov z1.h, p0/m, h0
-; CHECK: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #0 // =0x0
+; CHECK-NEXT:    mov z1.h, p0/m, h0
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %pg, bfloat %op)
   ret <vscale x 8 x bfloat> %out
 }
 
 define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op) #0 {
 ; CHECK-LABEL: test_svdup_n_bf16_m:
-; CHECK: mov z0.h, p0/m, h1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, p0/m, h1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op)
   ret <vscale x 8 x bfloat> %out
 }
@@ -105,8 +116,9 @@ define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactiv
 
 define <vscale x 8 x bfloat> @test_svdup_n_bf16_x(<vscale x 8 x i1> %pg, bfloat %op) #0 {
 ; CHECK-LABEL: test_svdup_n_bf16_x:
-; CHECK: mov z0.h, p0/m, h0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %pg, bfloat %op)
   ret <vscale x 8 x bfloat> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
index 380a158b767f6..a2d87766af36f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; ST1H
 define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %indices) {
 ; CHECK-LABEL: sst1h_s_uxtw:
-; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> %data_trunc,
                                                              <vscale x 4 x i1> %pg,
@@ -21,8 +23,9 @@ define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16*
 
 define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %indices) {
 ; CHECK-LABEL: sst1h_s_sxtw:
-; CHECK: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> %data_trunc,
                                                              <vscale x 4 x i1> %pg,
@@ -33,8 +36,9 @@ define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16*
 
 define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1h_d_uxtw:
-; CHECK: st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                              <vscale x 2 x i1> %pg,
@@ -45,8 +49,9 @@ define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16*
 
 define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1h_d_sxtw:
-; CHECK: st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                              <vscale x 2 x i1> %pg,
@@ -58,8 +63,9 @@ define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16*
 ; ST1W
 define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %indices) {
 ; CHECK-LABEL: sst1w_s_uxtw:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> %data,
                                                              <vscale x 4 x i1> %pg,
                                                              i32* %base,
@@ -69,8 +75,9 @@ define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32*
 
 define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %indices) {
 ; CHECK-LABEL: sst1w_s_sxtw:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> %data,
                                                              <vscale x 4 x i1> %pg,
                                                              i32* %base,
@@ -80,8 +87,9 @@ define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32*
 
 define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1w_d_uxtw:
-; CHECK: st1w { z0.d }, p0, [x0, z1.d, uxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d, uxtw #2]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                              <vscale x 2 x i1> %pg,
@@ -92,8 +100,9 @@ define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32*
 
 define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1w_d_sxtw:
-; CHECK: st1w { z0.d }, p0, [x0, z1.d, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d, sxtw #2]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                              <vscale x 2 x i1> %pg,
@@ -104,8 +113,9 @@ define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32*
 
 define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %indices) {
 ; CHECK-LABEL: sst1w_s_uxtw_float:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> %data,
                                                              <vscale x 4 x i1> %pg,
                                                              float* %base,
@@ -115,8 +125,9 @@ define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %p
 
 define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %indices) {
 ; CHECK-LABEL: sst1w_s_sxtw_float:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> %data,
                                                              <vscale x 4 x i1> %pg,
                                                              float* %base,
@@ -127,8 +138,9 @@ define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %p
 ; ST1D
 define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1d_d_uxtw:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i64(<vscale x 2 x i64> %data,
                                                              <vscale x 2 x i1> %pg,
                                                              i64* %base,
@@ -138,8 +150,9 @@ define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64*
 
 define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1d_d_sxtw:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i64(<vscale x 2 x i64> %data,
                                                              <vscale x 2 x i1> %pg,
                                                              i64* %base,
@@ -149,8 +162,9 @@ define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64*
 
 define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1d_d_uxtw_double:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2f64(<vscale x 2 x double> %data,
                                                              <vscale x 2 x i1> %pg,
                                                              double* %base,
@@ -160,8 +174,9 @@ define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1>
 
 define void @sst1d_d_sxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %indices) {
 ; CHECK-LABEL: sst1d_d_sxtw_double:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2f64(<vscale x 2 x double> %data,
                                                              <vscale x 2 x i1> %pg,
                                                              double* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
index 52f988e8abc18..c7dfc62f7cd12 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; ST1B
 define void @sst1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1b_s_uxtw:
-; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void  @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
                                                        <vscale x 4 x i1> %pg,
@@ -21,8 +23,9 @@ define void @sst1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %
 
 define void @sst1b_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1b_s_sxtw:
-; CHECK: st1b { z0.s }, p0, [x0, z1.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [x0, z1.s, sxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
                                                       <vscale x 4 x i1> %pg,
@@ -33,8 +36,9 @@ define void @sst1b_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %
 
 define void @sst1b_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1b_d_uxtw:
-; CHECK: st1b { z0.d }, p0, [x0, z1.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0, z1.d, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i8(<vscale x 2 x i8> %data_trunc,
                                                       <vscale x 2 x i1> %pg,
@@ -45,8 +49,9 @@ define void @sst1b_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %
 
 define void @sst1b_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1b_d_sxtw:
-; CHECK: st1b { z0.d }, p0, [x0, z1.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0, z1.d, sxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void  @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i8(<vscale x 2 x i8> %data_trunc,
                                                        <vscale x 2 x i1> %pg,
@@ -58,8 +63,9 @@ define void @sst1b_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %
 ; ST1H
 define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1h_s_uxtw:
-; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
                                                        <vscale x 4 x i1> %pg,
@@ -70,8 +76,9 @@ define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16*
 
 define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1h_s_sxtw:
-; CHECK: st1h { z0.s }, p0, [x0, z1.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0, z1.s, sxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
                                                        <vscale x 4 x i1> %pg,
@@ -82,8 +89,9 @@ define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16*
 
 define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1h_d_uxtw:
-; CHECK: st1h { z0.d }, p0, [x0, z1.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                        <vscale x 2 x i1> %pg,
@@ -94,8 +102,9 @@ define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16*
 
 define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1h_d_sxtw:
-; CHECK: st1h { z0.d }, p0, [x0, z1.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d, sxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                        <vscale x 2 x i1> %pg,
@@ -107,8 +116,9 @@ define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16*
 ; ST1W
 define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1w_s_uxtw:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data,
                                                        <vscale x 4 x i1> %pg,
                                                        i32* %base,
@@ -118,8 +128,9 @@ define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32*
 
 define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1w_s_sxtw:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, sxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> %data,
                                                        <vscale x 4 x i1> %pg,
                                                        i32* %base,
@@ -129,8 +140,9 @@ define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32*
 
 define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1w_d_uxtw:
-; CHECK: st1w { z0.d }, p0, [x0, z1.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                        <vscale x 2 x i1> %pg,
@@ -141,8 +153,9 @@ define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32*
 
 define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1w_d_sxtw:
-; CHECK: st1w { z0.d }, p0, [x0, z1.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d, sxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                        <vscale x 2 x i1> %pg,
@@ -153,8 +166,9 @@ define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32*
 
 define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1w_s_uxtw_float:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data,
                                                        <vscale x 4 x i1> %pg,
                                                        float* %base,
@@ -164,8 +178,9 @@ define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %p
 
 define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sst1w_s_sxtw_float:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, sxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> %data,
                                                        <vscale x 4 x i1> %pg,
                                                        float* %base,
@@ -176,8 +191,9 @@ define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %p
 ; ST1D
 define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1d_d_uxtw:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i64(<vscale x 2 x i64> %data,
                                                        <vscale x 2 x i1> %pg,
                                                        i64* %base,
@@ -187,8 +203,9 @@ define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64*
 
 define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1d_d_sxtw:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, sxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i64(<vscale x 2 x i64> %data,
                                                        <vscale x 2 x i1> %pg,
                                                        i64* %base,
@@ -198,8 +215,9 @@ define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64*
 
 define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1d_d_uxtw_double:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2f64(<vscale x 2 x double> %data,
                                                        <vscale x 2 x i1> %pg,
                                                        double* %base,
@@ -209,8 +227,9 @@ define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1>
 
 define void @sst1d_d_sxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %offsets) {
 ; CHECK-LABEL: sst1d_d_sxtw_double:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, sxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2f64(<vscale x 2 x double> %data,
                                                        <vscale x 2 x i1> %pg,
                                                        double* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
index ca81fe14e13a9..b9096917a9f54 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,9 +7,10 @@
 ;
 
 define void @sst1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sst1h_index
-; CHECK:	    st1h	{ z0.d }, p0, [x0, z1.d, lsl #1]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: sst1h_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d, lsl #1]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                         <vscale x 2 x i1> %pg,
@@ -18,9 +20,10 @@ define void @sst1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %
 }
 
 define void @sst1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sst1w_index
-; CHECK:	    st1w	{ z0.d }, p0, [x0, z1.d, lsl #2]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: sst1w_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d, lsl #2]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                         <vscale x 2 x i1> %pg,
@@ -30,9 +33,10 @@ define void @sst1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %
 }
 
 define void  @sst1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sst1d_index
-; CHECK:	    st1d	{ z0.d }, p0, [x0, z1.d, lsl #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: sst1d_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, lsl #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> %data,
                                                         <vscale x 2 x i1> %pg,
                                                         i64* %base,
@@ -41,9 +45,10 @@ define void  @sst1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64*
 }
 
 define void  @sst1d_index_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sst1d_index_double
-; CHECK:	    st1d	{ z0.d }, p0, [x0, z1.d, lsl #3]
-; CHECK-NEXT:	ret
+; CHECK-LABEL: sst1d_index_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d, lsl #3]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %data,
                                                         <vscale x 2 x i1> %pg,
                                                         double* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
index 00e72a5d470e7..f4c4402dbea71 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -7,8 +8,9 @@
 
 define void @sst1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sst1b_d:
-; CHECK: st1b { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> %data_trunc,
                                                  <vscale x 2 x i1> %pg,
@@ -19,8 +21,9 @@ define void @sst1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base,
 
 define void @sst1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sst1h_d:
-; CHECK: st1h { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                  <vscale x 2 x i1> %pg,
@@ -31,8 +34,9 @@ define void @sst1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base
 
 define void @sst1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sst1w_d:
-; CHECK: st1w { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                  <vscale x 2 x i1> %pg,
@@ -43,8 +47,9 @@ define void @sst1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base
 
 define void @sst1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sst1d_d:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> %data,
                                                  <vscale x 2 x i1> %pg,
                                                  i64* %base,
@@ -54,8 +59,9 @@ define void @sst1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base
 
 define void @sst1d_d_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sst1d_d_double:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> %data,
                                                  <vscale x 2 x i1> %pg,
                                                  double* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
index 176c0b2fd271b..797b8c18e02ea 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; ST1B
 define void @sst1b_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1b_s_imm_offset:
-; CHECK: st1b { z0.s }, p0, [z1.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [z1.s, #16]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
                                                                        <vscale x 4 x i1> %pg,
@@ -20,8 +22,9 @@ define void @sst1b_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg,
 
 define void @sst1b_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1b_d_imm_offset:
-; CHECK: st1b { z0.d }, p0, [z1.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [z1.d, #16]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
                                                                        <vscale x 2 x i1> %pg,
@@ -33,8 +36,9 @@ define void @sst1b_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg,
 ; ST1H
 define void @sst1h_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1h_s_imm_offset:
-; CHECK: st1h { z0.s }, p0, [z1.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [z1.s, #16]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
                                                                         <vscale x 4 x i1> %pg,
@@ -45,8 +49,9 @@ define void @sst1h_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg,
 
 define void @sst1h_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1h_d_imm_offset:
-; CHECK: st1h { z0.d }, p0, [z1.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [z1.d, #16]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
                                                                         <vscale x 2 x i1> %pg,
@@ -58,8 +63,9 @@ define void @sst1h_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg,
 ; ST1W
 define void @sst1w_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1w_s_imm_offset:
-; CHECK: st1w { z0.s }, p0, [z1.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [z1.s, #16]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
                                                                         <vscale x 4 x i1> %pg,
                                                                         <vscale x 4 x i32> %base,
@@ -69,8 +75,9 @@ define void @sst1w_s_imm_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg,
 
 define void @sst1w_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1w_d_imm_offset:
-; CHECK: st1w { z0.d }, p0, [z1.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [z1.d, #16]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
                                                                         <vscale x 2 x i1> %pg,
@@ -81,8 +88,9 @@ define void @sst1w_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg,
 
 define void @sst1w_s_imm_offset_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1w_s_imm_offset_float:
-; CHECK: st1w { z0.s }, p0, [z1.s, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [z1.s, #16]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
                                                                         <vscale x 4 x i1> %pg,
                                                                         <vscale x 4 x i32> %base,
@@ -93,8 +101,9 @@ define void @sst1w_s_imm_offset_float(<vscale x 4 x float> %data, <vscale x 4 x
 ; ST1D
 define void @sst1d_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1d_d_imm_offset:
-; CHECK: st1d { z0.d }, p0, [z1.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [z1.d, #16]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
                                                                         <vscale x 2 x i1> %pg,
                                                                         <vscale x 2 x i64> %base,
@@ -104,8 +113,9 @@ define void @sst1d_d_imm_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg,
 
 define void @sst1d_d_imm_offset_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1d_d_imm_offset_double:
-; CHECK: st1d { z0.d }, p0, [z1.d, #16]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [z1.d, #16]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
                                                                         <vscale x 2 x i1> %pg,
                                                                         <vscale x 2 x i64> %base,
@@ -121,9 +131,10 @@ define void @sst1d_d_imm_offset_double(<vscale x 2 x double> %data, <vscale x 2
 ; ST1B
 define void @sst1b_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1b_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    st1b { z0.s }, p0, [x8, z1.s, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
                                                                        <vscale x 4 x i1> %pg,
@@ -134,9 +145,10 @@ define void @sst1b_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x
 
 define void @sst1b_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1b_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #32
-; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
                                                                        <vscale x 2 x i1> %pg,
@@ -148,9 +160,10 @@ define void @sst1b_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x
 ; ST1H
 define void @sst1h_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1h_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    st1h { z0.s }, p0, [x8, z1.s, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
                                                                         <vscale x 4 x i1> %pg,
@@ -161,9 +174,10 @@ define void @sst1h_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x
 
 define void @sst1h_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1h_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #63
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #63
+; CHECK-NEXT:    st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
                                                                         <vscale x 2 x i1> %pg,
@@ -175,9 +189,10 @@ define void @sst1h_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x
 ; ST1W
 define void @sst1w_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1w_s_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8, z1.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
                                                                         <vscale x 4 x i1> %pg,
                                                                         <vscale x 4 x i32> %base,
@@ -187,9 +202,10 @@ define void @sst1w_s_imm_offset_out_of_range(<vscale x 4 x i32> %data, <vscale x
 
 define void @sst1w_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1w_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
                                                                         <vscale x 2 x i1> %pg,
@@ -200,9 +216,10 @@ define void @sst1w_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x
 
 define void @sst1w_s_imm_offset_float_out_of_range(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
 ; CHECK-LABEL: sst1w_s_imm_offset_float_out_of_range:
-; CHECK: mov	w8, #125
-; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #125
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8, z1.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
                                                                         <vscale x 4 x i1> %pg,
                                                                         <vscale x 4 x i32> %base,
@@ -213,9 +230,10 @@ define void @sst1w_s_imm_offset_float_out_of_range(<vscale x 4 x float> %data, <
 ; ST1D
 define void @sst1d_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1d_d_imm_offset_out_of_range:
-; CHECK: mov	w8, #249
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #249
+; CHECK-NEXT:    st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
                                                                         <vscale x 2 x i1> %pg,
                                                                         <vscale x 2 x i64> %base,
@@ -225,9 +243,10 @@ define void @sst1d_d_imm_offset_out_of_range(<vscale x 2 x i64> %data, <vscale x
 
 define void @sst1d_d_imm_offset_double_out_of_range(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
 ; CHECK-LABEL: sst1d_d_imm_offset_double_out_of_range:
-; CHECK: mov	w8, #249
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #249
+; CHECK-NEXT:    st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
                                                                         <vscale x 2 x i1> %pg,
                                                                         <vscale x 2 x i64> %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
index 59f098de4e899..41c7438a829e2 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; ST1B
 define void @sst1b_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: sst1b_s_scalar_offset:
-; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
                                                                        <vscale x 4 x i1> %pg,
@@ -20,8 +22,9 @@ define void @sst1b_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %
 
 define void @sst1b_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: sst1b_d_scalar_offset:
-; CHECK: st1b { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
                                                                        <vscale x 2 x i1> %pg,
@@ -33,8 +36,9 @@ define void @sst1b_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %
 ; ST1H
 define void @sst1h_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: sst1h_s_scalar_offset:
-; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
                                                                         <vscale x 4 x i1> %pg,
@@ -45,8 +49,9 @@ define void @sst1h_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %
 
 define void @sst1h_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: sst1h_d_scalar_offset:
-; CHECK: st1h { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
                                                                         <vscale x 2 x i1> %pg,
@@ -58,8 +63,9 @@ define void @sst1h_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %
 ; ST1W
 define void @sst1w_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: sst1w_s_scalar_offset:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
                                                                         <vscale x 4 x i1> %pg,
                                                                         <vscale x 4 x i32> %base,
@@ -69,8 +75,9 @@ define void @sst1w_s_scalar_offset(<vscale x 4 x i32> %data, <vscale x 4 x i1> %
 
 define void @sst1w_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: sst1w_d_scalar_offset:
-; CHECK: st1w { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
                                                                         <vscale x 2 x i1> %pg,
@@ -81,8 +88,9 @@ define void @sst1w_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %
 
 define void @sst1w_s_scalar_offset_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: sst1w_s_scalar_offset_float:
-; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, z1.s, uxtw]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
                                                                         <vscale x 4 x i1> %pg,
                                                                         <vscale x 4 x i32> %base,
@@ -93,8 +101,9 @@ define void @sst1w_s_scalar_offset_float(<vscale x 4 x float> %data, <vscale x 4
 ; ST1D
 define void @sst1d_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: sst1d_d_scalar_offset:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
                                                                         <vscale x 2 x i1> %pg,
                                                                         <vscale x 2 x i64> %base,
@@ -104,8 +113,9 @@ define void @sst1d_d_scalar_offset(<vscale x 2 x i64> %data, <vscale x 2 x i1> %
 
 define void @sst1d_d_scalar_offset_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: sst1d_d_scalar_offset_double:
-; CHECK: st1d { z0.d }, p0, [x0, z1.d]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
                                                                         <vscale x 2 x i1> %pg,
                                                                         <vscale x 2 x i64> %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
index 55201f6745cc0..8b47bc18b4b51 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i1> @sel_i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sel_i1:
-; CHECK: sel p0.b, p0, p1.b, p2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel p0.b, p0, p1.b, p2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i1> %a,
                                                                <vscale x 16 x i1> %b)
@@ -17,8 +19,9 @@ define <vscale x 16 x i1> @sel_i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a,
 
 define <vscale x 16 x i8> @sel_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sel_i8:
-; CHECK: sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sel.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b)
@@ -27,8 +30,9 @@ define <vscale x 16 x i8> @sel_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a,
 
 define <vscale x 8 x i16> @sel_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sel_i16:
-; CHECK: sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sel.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b)
@@ -37,8 +41,9 @@ define <vscale x 8 x i16> @sel_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i32> @sel_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sel_i32:
-; CHECK: sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b)
@@ -47,8 +52,9 @@ define <vscale x 4 x i32> @sel_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i64> @sel_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sel_i64:
-; CHECK: sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sel.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b)
@@ -57,8 +63,9 @@ define <vscale x 2 x i64> @sel_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 8 x bfloat> @sel_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
 ; CHECK-LABEL: sel_bf16:
-; CHECK: sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.sel.nxv8bf16(<vscale x 8 x i1> %pg,
                                                                    <vscale x 8 x bfloat> %a,
                                                                    <vscale x 8 x bfloat> %b)
@@ -67,8 +74,9 @@ define <vscale x 8 x bfloat> @sel_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bflo
 
 define <vscale x 8 x half> @sel_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: sel_f16:
-; CHECK: sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x half> %a,
                                                                 <vscale x 8 x half> %b)
@@ -77,8 +85,9 @@ define <vscale x 8 x half> @sel_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %
 
 define <vscale x 4 x float> @sel_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: sel_f32:
-; CHECK: sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.sel.nxv4f32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x float> %a,
                                                                  <vscale x 4 x float> %b)
@@ -87,8 +96,9 @@ define <vscale x 4 x float> @sel_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float>
 
 define <vscale x 2 x double> @sel_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: sel_f64:
-; CHECK: sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x double> %a,
                                                                   <vscale x 2 x double> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
index b2cd681742474..2324e3074a420 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
 
 ;
@@ -6,9 +7,10 @@
 
 define <vscale x 16 x i8> @asr_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: asr_i8_zero:
-; CHECK:      movprfx z0.b, p0/z, z0.b
-; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.b, p0/z, z0.b
+; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
                                                           <vscale x 16 x i8> %a_z,
@@ -18,9 +20,10 @@ define <vscale x 16 x i8> @asr_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 
 define <vscale x 8 x i16> @asr_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: asr_i16_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
                                                           <vscale x 8 x i16> %a_z,
@@ -30,9 +33,10 @@ define <vscale x 8 x i16> @asr_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @asr_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: asr_i32_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
                                                           <vscale x 4 x i32> %a_z,
@@ -42,9 +46,10 @@ define <vscale x 4 x i32> @asr_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32
 
 define <vscale x 2 x i64> @asr_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_i64_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
                                                           <vscale x 2 x i64> %a_z,
@@ -54,8 +59,11 @@ define <vscale x 2 x i64> @asr_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64
 
 define <vscale x 16 x i8> @asr_wide_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_wide_i8_zero:
-; CHECK-NOT:  movprfx
-; CHECK: asr z0.b, p0/m, z0.b, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.b, #0 // =0x0
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z2.b
+; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a_z,
@@ -65,8 +73,11 @@ define <vscale x 16 x i8> @asr_wide_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16
 
 define <vscale x 8 x i16> @asr_wide_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_wide_i16_zero:
-; CHECK-NOT:  movprfx
-; CHECK: asr z0.h, p0/m, z0.h, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.h, #0 // =0x0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z2.h
+; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a_z,
@@ -76,8 +87,11 @@ define <vscale x 8 x i16> @asr_wide_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8
 
 define <vscale x 4 x i32> @asr_wide_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_wide_i32_zero:
-; CHECK-NOT:  movprfx
-; CHECK: asr z0.s, p0/m, z0.s, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #0 // =0x0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z2.s
+; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a_z,
@@ -91,9 +105,10 @@ define <vscale x 4 x i32> @asr_wide_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 16 x i8> @asrd_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: asrd_i8_zero:
-; CHECK:      movprfx z0.b, p0/z, z0.b
-; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.b, p0/z, z0.b
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %pg,
                                                            <vscale x 16 x i8> %a_z,
@@ -103,9 +118,10 @@ define <vscale x 16 x i8> @asrd_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i
 
 define <vscale x 8 x i16> @asrd_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: asrd_i16_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> %pg,
                                                            <vscale x 8 x i16> %a_z,
@@ -115,9 +131,10 @@ define <vscale x 8 x i16> @asrd_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i1
 
 define <vscale x 4 x i32> @asrd_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: asrd_i32_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> %pg,
                                                            <vscale x 4 x i32> %a_z,
@@ -127,9 +144,10 @@ define <vscale x 4 x i32> @asrd_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i3
 
 define <vscale x 2 x i64> @asrd_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: asrd_i64_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #64
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> %pg,
                                                            <vscale x 2 x i64> %a_z,
@@ -143,9 +161,10 @@ define <vscale x 2 x i64> @asrd_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i6
 
 define <vscale x 16 x i8> @lsl_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: lsl_i8_zero:
-; CHECK:      movprfx z0.b, p0/z, z0.b
-; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.b, p0/z, z0.b
+; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
                                                           <vscale x 16 x i8> %a_z,
@@ -155,9 +174,10 @@ define <vscale x 16 x i8> @lsl_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 
 define <vscale x 8 x i16> @lsl_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: lsl_i16_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
                                                           <vscale x 8 x i16> %a_z,
@@ -167,9 +187,10 @@ define <vscale x 8 x i16> @lsl_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @lsl_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: lsl_i32_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
                                                           <vscale x 4 x i32> %a_z,
@@ -179,9 +200,10 @@ define <vscale x 4 x i32> @lsl_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32
 
 define <vscale x 2 x i64> @lsl_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_i64_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
                                                           <vscale x 2 x i64> %a_z,
@@ -191,8 +213,11 @@ define <vscale x 2 x i64> @lsl_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64
 
 define <vscale x 16 x i8> @lsl_wide_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_wide_i8_zero:
-; CHECK-NOT:  movprfx
-; CHECK: lsl z0.b, p0/m, z0.b, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.b, #0 // =0x0
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z2.b
+; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a_z,
@@ -202,8 +227,11 @@ define <vscale x 16 x i8> @lsl_wide_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16
 
 define <vscale x 8 x i16> @lsl_wide_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_wide_i16_zero:
-; CHECK-NOT:  movprfx
-; CHECK: lsl z0.h, p0/m, z0.h, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.h, #0 // =0x0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z2.h
+; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a_z,
@@ -213,8 +241,11 @@ define <vscale x 8 x i16> @lsl_wide_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8
 
 define <vscale x 4 x i32> @lsl_wide_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_wide_i32_zero:
-; CHECK-NOT:  movprfx
-; CHECK: lsl z0.s, p0/m, z0.s, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #0 // =0x0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z2.s
+; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a_z,
@@ -228,9 +259,10 @@ define <vscale x 4 x i32> @lsl_wide_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 16 x i8> @lsr_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: lsr_i8_zero:
-; CHECK:      movprfx z0.b, p0/z, z0.b
-; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.b, p0/z, z0.b
+; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
                                                           <vscale x 16 x i8> %a_z,
@@ -240,9 +272,10 @@ define <vscale x 16 x i8> @lsr_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 
 define <vscale x 8 x i16> @lsr_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: lsr_i16_zero:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
                                                           <vscale x 8 x i16> %a_z,
@@ -252,9 +285,10 @@ define <vscale x 8 x i16> @lsr_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @lsr_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: lsr_i32_zero:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
                                                           <vscale x 4 x i32> %a_z,
@@ -264,9 +298,10 @@ define <vscale x 4 x i32> @lsr_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32
 
 define <vscale x 2 x i64> @lsr_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i64_zero:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
                                                           <vscale x 2 x i64> %a_z,
@@ -276,8 +311,11 @@ define <vscale x 2 x i64> @lsr_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64
 
 define <vscale x 16 x i8> @lsr_wide_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_wide_i8_zero:
-; CHECK-NOT:  movprfx
-; CHECK: lsr z0.b, p0/m, z0.b, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.b, #0 // =0x0
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z2.b
+; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a_z,
@@ -287,8 +325,11 @@ define <vscale x 16 x i8> @lsr_wide_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16
 
 define <vscale x 8 x i16> @lsr_wide_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_wide_i16_zero:
-; CHECK-NOT:  movprfx
-; CHECK: lsr z0.h, p0/m, z0.h, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.h, #0 // =0x0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z2.h
+; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a_z,
@@ -298,8 +339,11 @@ define <vscale x 8 x i16> @lsr_wide_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8
 
 define <vscale x 4 x i32> @lsr_wide_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_wide_i32_zero:
-; CHECK-NOT:  movprfx
-; CHECK: lsr z0.s, p0/m, z0.s, z1.d
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #0 // =0x0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z2.s
+; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a_z,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
index c119f4b10f074..310fba4778025 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
 ;
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: asr_i8:
-; CHECK: asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b)
@@ -16,8 +18,9 @@ define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a,
 
 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: asr_i16:
-; CHECK: asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b)
@@ -26,8 +29,9 @@ define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: asr_i32:
-; CHECK: asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b)
@@ -36,8 +40,9 @@ define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_i64:
-; CHECK: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b)
@@ -46,8 +51,9 @@ define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i8> @asr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_wide_i8:
-; CHECK: asr z0.b, p0/m, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                     <vscale x 16 x i8> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -56,8 +62,9 @@ define <vscale x 16 x i8> @asr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 
 define <vscale x 8 x i16> @asr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_wide_i16:
-; CHECK: asr z0.h, p0/m, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x i16> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -66,8 +73,9 @@ define <vscale x 8 x i16> @asr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @asr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_wide_i32:
-; CHECK: asr z0.s, p0/m, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x i32> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -80,8 +88,9 @@ define <vscale x 4 x i32> @asr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32
 
 define <vscale x 16 x i8> @asrd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: asrd_i8:
-; CHECK: asrd z0.b, p0/m, z0.b, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 i32 1)
@@ -90,8 +99,9 @@ define <vscale x 16 x i8> @asrd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i16> @asrd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: asrd_i16:
-; CHECK: asrd z0.h, p0/m, z0.h, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 i32 2)
@@ -100,8 +110,9 @@ define <vscale x 8 x i16> @asrd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a
 
 define <vscale x 4 x i32> @asrd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: asrd_i32:
-; CHECK: asrd z0.s, p0/m, z0.s, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 i32 31)
@@ -110,8 +121,9 @@ define <vscale x 4 x i32> @asrd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a
 
 define <vscale x 2 x i64> @asrd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: asrd_i64:
-; CHECK: asrd z0.d, p0/m, z0.d, #64
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 i32 64)
@@ -124,64 +136,76 @@ define <vscale x 2 x i64> @asrd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a
 
 define <vscale x 16 x i8> @insr_i8(<vscale x 16 x i8> %a, i8 %b) {
 ; CHECK-LABEL: insr_i8:
-; CHECK: insr z0.b, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    insr z0.b, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> %a, i8 %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @insr_i16(<vscale x 8 x i16> %a, i16 %b) {
 ; CHECK-LABEL: insr_i16:
-; CHECK: insr z0.h, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    insr z0.h, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> %a, i16 %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @insr_i32(<vscale x 4 x i32> %a, i32 %b) {
 ; CHECK-LABEL: insr_i32:
-; CHECK: insr z0.s, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    insr z0.s, w0
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> %a, i32 %b)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) {
 ; CHECK-LABEL: insr_i64:
-; CHECK: insr z0.d, x0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    insr z0.d, x0
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> %a, i64 %b)
   ret <vscale x 2 x i64> %out
 }
 
 define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
 ; CHECK-LABEL: insr_f16:
-; CHECK: insr z0.h, h1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
+; CHECK-NEXT:    insr z0.h, h1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
   ret <vscale x 8 x half> %out
 }
 
 define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
 ; CHECK-LABEL: insr_bf16:
-; CHECK: insr z0.h, h1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
+; CHECK-NEXT:    insr z0.h, h1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
   ret <vscale x 8 x bfloat> %out
 }
 
 define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
 ; CHECK-LABEL: insr_f32:
-; CHECK: insr z0.s, s1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
+; CHECK-NEXT:    insr z0.s, s1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
   ret <vscale x 4 x float> %out
 }
 
 define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
 ; CHECK-LABEL: insr_f64:
-; CHECK: insr z0.d, d1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    insr z0.d, d1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b)
   ret <vscale x 2 x double> %out
 }
@@ -192,8 +216,9 @@ define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
 
 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: lsl_i8:
-; CHECK: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b)
@@ -202,8 +227,9 @@ define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a,
 
 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: lsl_i16:
-; CHECK: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b)
@@ -212,8 +238,9 @@ define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: lsl_i32:
-; CHECK: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b)
@@ -222,8 +249,9 @@ define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_i64:
-; CHECK: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b)
@@ -232,8 +260,9 @@ define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i8> @lsl_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_wide_i8:
-; CHECK: lsl z0.b, p0/m, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                     <vscale x 16 x i8> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -242,8 +271,9 @@ define <vscale x 16 x i8> @lsl_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 
 define <vscale x 8 x i16> @lsl_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_wide_i16:
-; CHECK: lsl z0.h, p0/m, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x i16> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -252,8 +282,9 @@ define <vscale x 8 x i16> @lsl_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @lsl_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_wide_i32:
-; CHECK: lsl z0.s, p0/m, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x i32> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -266,8 +297,9 @@ define <vscale x 4 x i32> @lsl_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32
 
 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: lsr_i8:
-; CHECK: lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b)
@@ -276,8 +308,9 @@ define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a,
 
 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: lsr_i16:
-; CHECK: lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b)
@@ -286,8 +319,9 @@ define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a,
 
 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: lsr_i32:
-; CHECK: lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b)
@@ -296,8 +330,9 @@ define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a,
 
 define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i64:
-; CHECK: lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b)
@@ -306,8 +341,9 @@ define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a,
 
 define <vscale x 16 x i8> @lsr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_wide_i8:
-; CHECK: lsr z0.b, p0/m, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                     <vscale x 16 x i8> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -316,8 +352,9 @@ define <vscale x 16 x i8> @lsr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 
 define <vscale x 8 x i16> @lsr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_wide_i16:
-; CHECK: lsr z0.h, p0/m, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x i16> %a,
                                                                     <vscale x 2 x i64> %b)
@@ -326,8 +363,9 @@ define <vscale x 8 x i16> @lsr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @lsr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_wide_i32:
-; CHECK: lsr z0.s, p0/m, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x i32> %a,
                                                                     <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
index f89cad6579172..baecf6e1bf837 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for
 ;   * @llvm.aarch64.sve.sqinc{b|h|w|d|p}, and
@@ -14,8 +15,9 @@
 
 define <vscale x 8 x i16> @sqdech(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqdech:
-; CHECK: sqdech z0.h, pow2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdech z0.h, pow2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdech.nxv8i16(<vscale x 8 x i16> %a,
                                                                   i32 0, i32 1)
   ret <vscale x 8 x i16> %out
@@ -27,8 +29,9 @@ define <vscale x 8 x i16> @sqdech(<vscale x 8 x i16> %a) {
 
 define <vscale x 4 x i32> @sqdecw(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqdecw:
-; CHECK: sqdecw z0.s, vl1, mul #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecw z0.s, vl1, mul #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdecw.nxv4i32(<vscale x 4 x i32> %a,
                                                                   i32 1, i32 2)
   ret <vscale x 4 x i32> %out
@@ -40,8 +43,9 @@ define <vscale x 4 x i32> @sqdecw(<vscale x 4 x i32> %a) {
 
 define <vscale x 2 x i64> @sqdecd(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqdecd:
-; CHECK: sqdecd z0.d, vl2, mul #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecd z0.d, vl2, mul #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdecd.nxv2i64(<vscale x 2 x i64> %a,
                                                                   i32 2, i32 3)
   ret <vscale x 2 x i64> %out
@@ -53,8 +57,9 @@ define <vscale x 2 x i64> @sqdecd(<vscale x 2 x i64> %a) {
 
 define <vscale x 8 x i16> @sqdecp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_b16:
-; CHECK: sqdecp z0.h, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp z0.h, p0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdecp.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i1> %b)
   ret <vscale x 8 x i16> %out
@@ -62,8 +67,9 @@ define <vscale x 8 x i16> @sqdecp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x i32> @sqdecp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_b32:
-; CHECK: sqdecp z0.s, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp z0.s, p0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdecp.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i1> %b)
   ret <vscale x 4 x i32> %out
@@ -71,8 +77,9 @@ define <vscale x 4 x i32> @sqdecp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @sqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_b64:
-; CHECK: sqdecp z0.d, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp z0.d, p0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdecp.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i1> %b)
   ret <vscale x 2 x i64> %out
@@ -84,16 +91,21 @@ define <vscale x 2 x i64> @sqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 
 define i32 @sqdecb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i32:
-; CHECK: sqdecb x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
 }
 
 define i64 @sqdecb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i64:
-; CHECK: sqdecb x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -102,8 +114,9 @@ define i64 @sqdecb_n32_i64(i32 %a) {
 
 define i64 @sqdecb_n64(i64 %a) {
 ; CHECK-LABEL: sqdecb_n64:
-; CHECK: sqdecb x0, vl4, mul #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecb x0, vl4, mul #5
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecb.n64(i64 %a, i32 4, i32 5)
   ret i64 %out
 }
@@ -114,16 +127,21 @@ define i64 @sqdecb_n64(i64 %a) {
 
 define i32 @sqdech_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i32:
-; CHECK: sqdech x0, w0, vl5, mul #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdech x0, w0, vl5, mul #6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
 }
 
 define i64 @sqdech_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i64:
-; CHECK: sqdech x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdech x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -132,8 +150,9 @@ define i64 @sqdech_n32_i64(i32 %a) {
 
 define i64 @sqdech_n64(i64 %a) {
 ; CHECK-LABEL: sqdech_n64:
-; CHECK: sqdech x0, vl6, mul #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdech x0, vl6, mul #7
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdech.n64(i64 %a, i32 6, i32 7)
   ret i64 %out
 }
@@ -144,16 +163,21 @@ define i64 @sqdech_n64(i64 %a) {
 
 define i32 @sqdecw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i32:
-; CHECK: sqdecw x0, w0, vl7, mul #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecw x0, w0, vl7, mul #8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
 }
 
 define i64 @sqdecw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i64:
-; CHECK: sqdecw x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecw x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -162,8 +186,9 @@ define i64 @sqdecw_n32_i64(i32 %a) {
 
 define i64 @sqdecw_n64(i64 %a) {
 ; CHECK-LABEL: sqdecw_n64:
-; CHECK: sqdecw x0, vl8, mul #9
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecw x0, vl8, mul #9
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecw.n64(i64 %a, i32 8, i32 9)
   ret i64 %out
 }
@@ -174,16 +199,21 @@ define i64 @sqdecw_n64(i64 %a) {
 
 define i32 @sqdecd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i32:
-; CHECK: sqdecd x0, w0, vl16, mul #10
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecd x0, w0, vl16, mul #10
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
 }
 
 define i64 @sqdecd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i64:
-; CHECK: sqdecd x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecd x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -192,8 +222,9 @@ define i64 @sqdecd_n32_i64(i32 %a) {
 
 define i64 @sqdecd_n64(i64 %a) {
 ; CHECK-LABEL: sqdecd_n64:
-; CHECK: sqdecd x0, vl32, mul #11
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecd x0, vl32, mul #11
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecd.n64(i64 %a, i32 10, i32 11)
   ret i64 %out
 }
@@ -204,16 +235,21 @@ define i64 @sqdecd_n64(i64 %a) {
 
 define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i32:
-; CHECK: sqdecp x0, p0.b, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.b, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i64:
-; CHECK: sqdecp x0, p0.b, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.b, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -222,16 +258,21 @@ define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 
 define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i32:
-; CHECK: sqdecp x0, p0.h, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.h, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i64:
-; CHECK: sqdecp x0, p0.h, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.h, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -240,16 +281,21 @@ define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 
 define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i32:
-; CHECK: sqdecp x0, p0.s, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.s, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i64:
-; CHECK: sqdecp x0, p0.s, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.s, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -258,16 +304,21 @@ define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 
 define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i32:
-; CHECK: sqdecp x0, p0.d, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.d, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqdecp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i64:
-; CHECK: sqdecp x0, p0.d, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqdecp x0, p0.d, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -276,32 +327,36 @@ define i64 @sqdecp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 
 define i64 @sqdecp_n64_b8(i64 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n64_b8:
-; CHECK: sqdecp x0, p0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp x0, p0.b
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv16i1(i64 %a, <vscale x 16 x i1> %b)
   ret i64 %out
 }
 
 define i64 @sqdecp_n64_b16(i64 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n64_b16:
-; CHECK: sqdecp x0, p0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp x0, p0.h
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv8i1(i64 %a, <vscale x 8 x i1> %b)
   ret i64 %out
 }
 
 define i64 @sqdecp_n64_b32(i64 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n64_b32:
-; CHECK: sqdecp x0, p0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp x0, p0.s
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv4i1(i64 %a, <vscale x 4 x i1> %b)
   ret i64 %out
 }
 
 define i64 @sqdecp_n64_b64(i64 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n64_b64:
-; CHECK: sqdecp x0, p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdecp x0, p0.d
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqdecp.n64.nxv2i1(i64 %a, <vscale x 2 x i1> %b)
   ret i64 %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
index 4631eb7fe700e..dcee9962c784a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for
 ;   * @llvm.aarch64.sve.sqinc{b|h|w|d|p}, and
@@ -14,8 +15,9 @@
 
 define <vscale x 8 x i16> @sqinch(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqinch:
-; CHECK: sqinch z0.h, pow2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqinch z0.h, pow2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqinch.nxv8i16(<vscale x 8 x i16> %a,
                                                                   i32 0, i32 1)
   ret <vscale x 8 x i16> %out
@@ -27,8 +29,9 @@ define <vscale x 8 x i16> @sqinch(<vscale x 8 x i16> %a) {
 
 define <vscale x 4 x i32> @sqincw(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqincw:
-; CHECK: sqincw z0.s, vl1, mul #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincw z0.s, vl1, mul #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqincw.nxv4i32(<vscale x 4 x i32> %a,
                                                                   i32 1, i32 2)
   ret <vscale x 4 x i32> %out
@@ -40,8 +43,9 @@ define <vscale x 4 x i32> @sqincw(<vscale x 4 x i32> %a) {
 
 define <vscale x 2 x i64> @sqincd(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqincd:
-; CHECK: sqincd z0.d, vl2, mul #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincd z0.d, vl2, mul #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqincd.nxv2i64(<vscale x 2 x i64> %a,
                                                                   i32 2, i32 3)
   ret <vscale x 2 x i64> %out
@@ -53,8 +57,9 @@ define <vscale x 2 x i64> @sqincd(<vscale x 2 x i64> %a) {
 
 define <vscale x 8 x i16> @sqincp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_b16:
-; CHECK: sqincp z0.h, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp z0.h, p0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqincp.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i1> %b)
   ret <vscale x 8 x i16> %out
@@ -62,8 +67,9 @@ define <vscale x 8 x i16> @sqincp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x i32> @sqincp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_b32:
-; CHECK: sqincp z0.s, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp z0.s, p0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqincp.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i1> %b)
   ret <vscale x 4 x i32> %out
@@ -71,8 +77,9 @@ define <vscale x 4 x i32> @sqincp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @sqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_b64:
-; CHECK: sqincp z0.d, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp z0.d, p0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqincp.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i1> %b)
   ret <vscale x 2 x i64> %out
@@ -84,16 +91,21 @@ define <vscale x 2 x i64> @sqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 
 define i32 @sqincb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i32:
-; CHECK: sqincb x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
 }
 
 define i64 @sqincb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i64:
-; CHECK: sqincb x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -102,8 +114,9 @@ define i64 @sqincb_n32_i64(i32 %a) {
 
 define i64 @sqincb_n64(i64 %a) {
 ; CHECK-LABEL: sqincb_n64:
-; CHECK: sqincb x0, vl4, mul #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincb x0, vl4, mul #5
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincb.n64(i64 %a, i32 4, i32 5)
   ret i64 %out
 }
@@ -114,16 +127,21 @@ define i64 @sqincb_n64(i64 %a) {
 
 define i32 @sqinch_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i32:
-; CHECK: sqinch x0, w0, vl5, mul #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqinch x0, w0, vl5, mul #6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
 }
 
 define i64 @sqinch_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i64:
-; CHECK: sqinch x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqinch x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -132,8 +150,9 @@ define i64 @sqinch_n32_i64(i32 %a) {
 
 define i64 @sqinch_n64(i64 %a) {
 ; CHECK-LABEL: sqinch_n64:
-; CHECK: sqinch x0, vl6, mul #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqinch x0, vl6, mul #7
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqinch.n64(i64 %a, i32 6, i32 7)
   ret i64 %out
 }
@@ -144,16 +163,21 @@ define i64 @sqinch_n64(i64 %a) {
 
 define i32 @sqincw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i32:
-; CHECK: sqincw x0, w0, vl7, mul #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincw x0, w0, vl7, mul #8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
 }
 
 define i64 @sqincw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i64:
-; CHECK: sqincw x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincw x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -162,8 +186,9 @@ define i64 @sqincw_n32_i64(i32 %a) {
 
 define i64 @sqincw_n64(i64 %a) {
 ; CHECK-LABEL: sqincw_n64:
-; CHECK: sqincw x0, vl8, mul #9
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincw x0, vl8, mul #9
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincw.n64(i64 %a, i32 8, i32 9)
   ret i64 %out
 }
@@ -174,16 +199,21 @@ define i64 @sqincw_n64(i64 %a) {
 
 define i32 @sqincd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i32:
-; CHECK: sqincd x0, w0, vl16, mul #10
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincd x0, w0, vl16, mul #10
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
 }
 
 define i64 @sqincd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i64:
-; CHECK: sqincd x0, w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincd x0, w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 3, i32 4)
   %out_sext = sext i32 %out to i64
 
@@ -192,8 +222,9 @@ define i64 @sqincd_n32_i64(i32 %a) {
 
 define i64 @sqincd_n64(i64 %a) {
 ; CHECK-LABEL: sqincd_n64:
-; CHECK: sqincd x0, vl32, mul #11
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincd x0, vl32, mul #11
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincd.n64(i64 %a, i32 10, i32 11)
   ret i64 %out
 }
@@ -204,16 +235,21 @@ define i64 @sqincd_n64(i64 %a) {
 
 define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i32:
-; CHECK: sqincp x0, p0.b, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.b, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i64:
-; CHECK: sqincp x0, p0.b, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.b, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -222,16 +258,21 @@ define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 
 define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i32:
-; CHECK: sqincp x0, p0.h, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.h, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i64:
-; CHECK: sqincp x0, p0.h, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.h, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -240,16 +281,21 @@ define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 
 define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i32:
-; CHECK: sqincp x0, p0.s, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.s, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i64:
-; CHECK: sqincp x0, p0.s, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.s, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -258,16 +304,21 @@ define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 
 define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i32:
-; CHECK: sqincp x0, p0.d, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.d, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
 }
 
 define i64 @sqincp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i64:
-; CHECK: sqincp x0, p0.d, w0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sqincp x0, p0.d, w0
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   %out_sext = sext i32 %out to i64
 
@@ -276,32 +327,36 @@ define i64 @sqincp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 
 define i64 @sqincp_n64_b8(i64 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n64_b8:
-; CHECK: sqincp x0, p0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp x0, p0.b
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv16i1(i64 %a, <vscale x 16 x i1> %b)
   ret i64 %out
 }
 
 define i64 @sqincp_n64_b16(i64 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n64_b16:
-; CHECK: sqincp x0, p0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp x0, p0.h
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv8i1(i64 %a, <vscale x 8 x i1> %b)
   ret i64 %out
 }
 
 define i64 @sqincp_n64_b32(i64 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n64_b32:
-; CHECK: sqincp x0, p0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp x0, p0.s
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv4i1(i64 %a, <vscale x 4 x i1> %b)
   ret i64 %out
 }
 
 define i64 @sqincp_n64_b64(i64 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n64_b64:
-; CHECK: sqincp x0, p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqincp x0, p0.d
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.sqincp.n64.nxv2i1(i64 %a, <vscale x 2 x i1> %b)
   ret i64 %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
index 91486dcc57b98..838b9ec21cdb3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define void @st1b_upper_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_upper_bound:
-; CHECK: st1b { z0.b }, p0, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -18,8 +20,9 @@ define void @st1b_upper_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg,
 
 define void @st1b_inbound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_inbound:
-; CHECK: st1b { z0.b }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -29,8 +32,9 @@ define void @st1b_inbound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8*
 
 define void @st1b_lower_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_lower_bound:
-; CHECK: st1b { z0.b }, p0, [x0, #-8, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0, #-8, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -8
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -40,9 +44,10 @@ define void @st1b_lower_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg,
 
 define void @st1b_out_of_upper_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_out_of_upper_bound:
-; CHECK: rdvl x[[OFFSET:[0-9]+]], #8
-; CHECK: st1b { z0.b }, p0, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #8
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 8
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -52,9 +57,10 @@ define void @st1b_out_of_upper_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1
 
 define void @st1b_out_of_lower_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_out_of_lower_bound:
-; CHECK: rdvl x[[OFFSET:[0-9]+]], #-9
-; CHECK: st1b { z0.b }, p0, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #-9
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -9
   %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
@@ -64,8 +70,9 @@ define void @st1b_out_of_lower_bound(<vscale x 16 x i8> %data, <vscale x 16 x i1
 
 define void @st1b_s_inbound(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_s_inbound:
-; CHECK: st1b { z0.s }, p0, [x0, #7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
   %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
   %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
@@ -76,8 +83,9 @@ define void @st1b_s_inbound(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8*
 
 define void @st1b_h_inbound(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_h_inbound:
-; CHECK: st1b { z0.h }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.h }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
   %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
@@ -88,8 +96,9 @@ define void @st1b_h_inbound(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i8*
 
 define void @st1b_d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %a) {
 ; CHECK-LABEL: st1b_d_inbound:
-; CHECK: st1b { z0.d }, p0, [x0, #-7, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0, #-7, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
   %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 -7
   %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
@@ -104,8 +113,9 @@ define void @st1b_d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8*
 
 define void @st1h_inbound(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: st1h_inbound:
-; CHECK: st1h { z0.h }, p0, [x0, #-1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0, #-1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 8 x i16>*
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %base_scalable, i64 -1
   %base_scalar = bitcast <vscale x 8 x i16>* %base to i16*
@@ -115,8 +125,9 @@ define void @st1h_inbound(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16*
 
 define void @st1h_f16_inbound(<vscale x 8 x half> %data, <vscale x 8 x i1> %pg, half* %a) {
 ; CHECK-LABEL: st1h_f16_inbound:
-; CHECK: st1h { z0.h }, p0, [x0, #-5, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0, #-5, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast half* %a to <vscale x 8 x half>*
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %base_scalable, i64 -5
   %base_scalar = bitcast <vscale x 8 x half>* %base to half*
@@ -126,8 +137,9 @@ define void @st1h_f16_inbound(<vscale x 8 x half> %data, <vscale x 8 x i1> %pg,
 
 define void @st1h_bf16_inbound(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pg, bfloat* %a) #0 {
 ; CHECK-LABEL: st1h_bf16_inbound:
-; CHECK: st1h { z0.h }, p0, [x0, #-5, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0, #-5, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast bfloat* %a to <vscale x 8 x bfloat>*
   %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %base_scalable, i64 -5
   %base_scalar = bitcast <vscale x 8 x bfloat>* %base to bfloat*
@@ -137,8 +149,9 @@ define void @st1h_bf16_inbound(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %p
 
 define void @st1h_s_inbound(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: st1h_s_inbound:
-; CHECK: st1h { z0.s }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
   %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 2
   %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
@@ -149,8 +162,9 @@ define void @st1h_s_inbound(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16
 
 define void @st1h_d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %a) {
 ; CHECK-LABEL: st1h_d_inbound:
-; CHECK: st1h { z0.d }, p0, [x0, #-4, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, #-4, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
   %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 -4
   %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
@@ -165,8 +179,9 @@ define void @st1h_d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16
 
 define void @st1w_inbound(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: st1w_inbound:
-; CHECK: st1w { z0.s }, p0, [x0, #6, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, #6, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i32* %a to <vscale x 4 x i32>*
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %base_scalable, i64 6
   %base_scalar = bitcast <vscale x 4 x i32>* %base to i32*
@@ -176,8 +191,9 @@ define void @st1w_inbound(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32*
 
 define void @st1w_f32_inbound(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %a) {
 ; CHECK-LABEL: st1w_f32_inbound:
-; CHECK: st1w { z0.s }, p0, [x0, #-1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, #-1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast float* %a to <vscale x 4 x float>*
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %base_scalable, i64 -1
   %base_scalar = bitcast <vscale x 4 x float>* %base to float*
@@ -187,8 +203,9 @@ define void @st1w_f32_inbound(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg,
 
 define void @st1w_d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %a) {
 ; CHECK-LABEL: st1w_d_inbound:
-; CHECK: st1w { z0.d }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
   %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 1
   %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
@@ -203,8 +220,9 @@ define void @st1w_d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32
 
 define void @st1d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %a) {
 ; CHECK-LABEL: st1d_inbound:
-; CHECK: st1d { z0.d }, p0, [x0, #5, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, #5, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast i64* %a to <vscale x 2 x i64>*
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base_scalable, i64 5
   %base_scalar = bitcast <vscale x 2 x i64>* %base to i64*
@@ -214,8 +232,9 @@ define void @st1d_inbound(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64*
 
 define void @st1d_f64_inbound(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %a) {
 ; CHECK-LABEL: st1d_f64_inbound:
-; CHECK: st1d { z0.d }, p0, [x0, #-8, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, #-8, mul vl]
+; CHECK-NEXT:    ret
   %base_scalable = bitcast double* %a to <vscale x 2 x double>*
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %base_scalable, i64 -8
   %base_scalar = bitcast <vscale x 2 x double>* %base to double*

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
index 7593b81e57ef7..a29315dc2f544 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: st1b_i8:
-; CHECK: st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data,
                                           <vscale x 16 x i1> %pred,
@@ -20,8 +22,9 @@ define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %a,
 
 define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: st1b_h:
-; CHECK: st1b { z0.h }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.h }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
   call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc,
@@ -32,8 +35,9 @@ define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i8* %a, i
 
 define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: st1b_s:
-; CHECK: st1b { z0.s }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc,
@@ -44,8 +48,9 @@ define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i8* %a, i
 
 define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i8* %a, i64 %index) {
 ; CHECK-LABEL: st1b_d:
-; CHECK: st1b { z0.d }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %base = getelementptr i8, i8* %a, i64 %index
   %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc,
@@ -60,8 +65,9 @@ define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i8* %a, i
 
 define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %a, i64 %index) {
 ; CHECK-LABEL: st1h_i16:
-; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data,
                                           <vscale x 8 x i1> %pred,
@@ -71,8 +77,9 @@ define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %a
 
 define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %a, i64 %index) {
 ; CHECK-LABEL: st1h_f16:
-; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr half, half* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data,
                                           <vscale x 8 x i1> %pred,
@@ -82,8 +89,9 @@ define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half*
 
 define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %a, i64 %index) #0 {
 ; CHECK-LABEL: st1h_bf16:
-; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr bfloat, bfloat* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data,
                                            <vscale x 8 x i1> %pred,
@@ -93,8 +101,9 @@ define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfl
 
 define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st1h_s:
-; CHECK: st1h { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc,
                                          <vscale x 4 x i1> %pred,
@@ -104,8 +113,9 @@ define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i16* %add
 
 define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i16* %a, i64 %index) {
 ; CHECK-LABEL: st1h_d:
-; CHECK: st1h { z0.d }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %base = getelementptr i16, i16* %a, i64 %index
   %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc,
@@ -120,8 +130,9 @@ define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i16* %a,
 
 define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %a, i64 %index) {
 ; CHECK-LABEL: st1w_i32:
-; CHECK: st1w { z0.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data,
                                           <vscale x 4 x i1> %pred,
@@ -131,8 +142,9 @@ define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %a
 
 define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %a, i64 %index) {
 ; CHECK-LABEL: st1w_f32:
-; CHECK: st1w { z0.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr float, float* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data,
                                           <vscale x 4 x i1> %pred,
@@ -142,8 +154,9 @@ define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float
 
 define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i32* %a, i64 %index) {
 ; CHECK-LABEL: st1w_d:
-; CHECK: st1w { z0.d }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %base = getelementptr i32, i32* %a, i64 %index
   %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc,
@@ -158,8 +171,9 @@ define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i32* %a,
 
 define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %a, i64 %index) {
 ; CHECK-LABEL: st1d_i64:
-; CHECK: st1d { z0.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr i64, i64* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data,
                                           <vscale x 2 x i1> %pred,
@@ -169,8 +183,9 @@ define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %a
 
 define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %a, i64 %index) {
 ; CHECK-LABEL: st1d_f64:
-; CHECK: st1d { z0.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %base = getelementptr double, double* %a, i64 %index
   call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data,
                                           <vscale x 2 x i1> %pred,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
index 5c4db6ec7e131..8842337149d97 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
@@ -1,6 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 
 define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st1b_i8:
-; CHECK: st1b { z0.b }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data,
                                           <vscale x 16 x i1> %pred,
                                           i8* %addr)
@@ -19,8 +21,9 @@ define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %ad
 
 define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st1b_h:
-; CHECK: st1b { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
   call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc,
                                          <vscale x 8 x i1> %pred,
@@ -30,8 +33,9 @@ define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i8* %addr
 
 define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st1b_s:
-; CHECK: st1b { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc,
                                          <vscale x 4 x i1> %pred,
@@ -41,8 +45,9 @@ define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i8* %addr
 
 define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st1b_d:
-; CHECK: st1b { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc,
                                          <vscale x 2 x i1> %pred,
@@ -56,8 +61,9 @@ define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i8* %addr
 
 define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st1h_i16:
-; CHECK: st1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data,
                                           <vscale x 8 x i1> %pred,
                                           i16* %addr)
@@ -66,8 +72,9 @@ define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %a
 
 define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: st1h_f16:
-; CHECK: st1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data,
                                           <vscale x 8 x i1> %pred,
                                           half* %addr)
@@ -76,8 +83,9 @@ define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half*
 
 define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 ; CHECK-LABEL: st1h_bf16:
-; CHECK: st1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data,
                                            <vscale x 8 x i1> %pred,
                                            bfloat* %addr)
@@ -86,8 +94,9 @@ define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfl
 
 define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st1h_s:
-; CHECK: st1h { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc,
                                          <vscale x 4 x i1> %pred,
@@ -97,8 +106,9 @@ define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i16* %add
 
 define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st1h_d:
-; CHECK: st1h { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc,
                                          <vscale x 2 x i1> %pred,
@@ -112,8 +122,9 @@ define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i16* %add
 
 define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: st1w_i32:
-; CHECK: st1w { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data,
                                           <vscale x 4 x i1> %pred,
                                           i32* %addr)
@@ -122,8 +133,9 @@ define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %a
 
 define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: st1w_f32:
-; CHECK: st1w { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data,
                                           <vscale x 4 x i1> %pred,
                                           float* %addr)
@@ -132,8 +144,9 @@ define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float
 
 define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: st1w_d:
-; CHECK: st1w { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc,
                                          <vscale x 2 x i1> %pred,
@@ -147,8 +160,9 @@ define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i32* %add
 
 define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: st1d_i64:
-; CHECK: st1d { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data,
                                           <vscale x 2 x i1> %pred,
                                           i64* %addr)
@@ -157,8 +171,9 @@ define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %a
 
 define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: st1d_f64:
-; CHECK: st1d { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data,
                                           <vscale x 2 x i1> %pred,
                                           double* %addr)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index fe435300d9e3d..e6ca643182b58 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ; NOTE: invalid, upper and lower bound immediate values of the reg+imm
 ; addressing mode are checked only for the byte version of each
@@ -13,8 +14,11 @@
 
 define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2, i64 0
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -25,9 +29,12 @@ define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 
 define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
-; CHECK: rdvl x[[N:[0-9]+]], #3
-; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    rdvl x8, #3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -38,9 +45,12 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vsca
 
 define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #-18
-; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    rdvl x8, #-18
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18, i64 0
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -51,9 +61,12 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 
 define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #16
-; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    rdvl x8, #16
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16, i64 0
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -64,8 +77,11 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 
 define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16, i64 0
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -76,8 +92,11 @@ define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 
 define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14, i64 0
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -92,8 +111,11 @@ define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
 ; CHECK-LABEL: st2h_i16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2, i64 0
   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -104,8 +126,11 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
 ; CHECK-LABEL: st2h_f16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2, i64 0
   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -120,8 +145,11 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
 ; CHECK-LABEL: st2w_i32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4, i64 0
   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -132,8 +160,11 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
 ; CHECK-LABEL: st2w_f32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6, i64 0
   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -148,8 +179,11 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
 ; CHECK-LABEL: st2d_i64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8, i64 0
   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -160,8 +194,11 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
 ; CHECK-LABEL: st2d_f64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10, i64 0
   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
@@ -176,8 +213,12 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -189,9 +230,13 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 
 define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
-; CHECK: rdvl x[[N:[0-9]+]], #4
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    rdvl x8, #4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -203,9 +248,13 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
 
 define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
-; CHECK: rdvl x[[N:[0-9]+]], #5
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    rdvl x8, #5
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -217,9 +266,13 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
 
 define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #-27
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    rdvl x8, #-27
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -231,9 +284,13 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 
 define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #24
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    rdvl x8, #24
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -245,8 +302,12 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 
 define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -258,8 +319,12 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 
 define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -275,8 +340,12 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
 ; CHECK-LABEL: st3h_i16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -288,8 +357,12 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
 ; CHECK-LABEL: st3h_f16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -305,8 +378,12 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
 ; CHECK-LABEL: st3w_i32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -318,8 +395,12 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
 ; CHECK-LABEL: st3w_f32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -335,8 +416,12 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
 ; CHECK-LABEL: st3d_i64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -348,8 +433,12 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
 ; CHECK-LABEL: st3d_f64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
@@ -365,8 +454,13 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -379,9 +473,14 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 
 define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
-; CHECK: rdvl x[[N:[0-9]+]], #5
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    rdvl x8, #5
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -394,9 +493,14 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
 
 define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
-; CHECK: rdvl x[[N:[0-9]+]], #6
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    rdvl x8, #6
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -409,9 +513,14 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
 
 define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
-; CHECK: rdvl x[[N:[0-9]+]], #7
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    rdvl x8, #7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -424,16 +533,21 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <v
 
 define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x9, #-576
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
 ; xM = -9 * 2^6
 ; xP = RDVL * 2^-4
 ; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
-; CHECK: rdvl x[[N:[0-9]+]], #1
-; CHECK-DAG:  mov  x[[M:[0-9]+]], #-576
-; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
-; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -446,16 +560,21 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 
 define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov w9, #512
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8]
+; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
 ; xM = 2^9
 ; xP = RDVL * 2^-4
 ; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
-; CHECK: rdvl x[[N:[0-9]+]], #1
-; CHECK-DAG:  mov  w[[M:[0-9]+]], #512
-; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
-; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -468,8 +587,13 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 
 define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -482,8 +606,13 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 
 define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -500,8 +629,13 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
 ; CHECK-LABEL: st4h_i16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -514,8 +648,13 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
 ; CHECK-LABEL: st4h_f16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -532,8 +671,13 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
 ; CHECK-LABEL: st4w_i32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -546,8 +690,13 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
 ; CHECK-LABEL: st4w_f32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -564,8 +713,13 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
 ; CHECK-LABEL: st4d_i64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -578,8 +732,13 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
 ; CHECK-LABEL: st4d_f64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
+; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index e034f0d47b704..464cf97c57036 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ;
 ; ST2B
@@ -7,8 +8,11 @@
 
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
 ; CHECK-LABEL: st2b_i8:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %1 = getelementptr i8, i8* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -23,8 +27,11 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_i16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %1 = getelementptr i16, i16* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -35,8 +42,11 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_f16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %1 = getelementptr half, half* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -51,8 +61,11 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_i32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %1 = getelementptr i32, i32* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -63,8 +76,11 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_f32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %1 = getelementptr float, float* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -79,8 +95,11 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_i64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %1 = getelementptr i64, i64* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -91,8 +110,11 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_f64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %1 = getelementptr double, double* %addr, i64 %offset
   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
@@ -107,8 +129,12 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
 ; CHECK-LABEL: st3b_i8:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %1 = getelementptr i8, i8* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -124,8 +150,12 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_i16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %1 = getelementptr i16, i16* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -137,8 +167,12 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_f16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %1 = getelementptr half, half* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -154,8 +188,12 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_i32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %1 = getelementptr i32, i32* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -167,8 +205,12 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_f32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %1 = getelementptr float, float* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -184,8 +226,12 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_i64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %1 = getelementptr i64, i64* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -197,8 +243,12 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_f64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %1 = getelementptr double, double* %addr, i64 %offset
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
@@ -214,8 +264,13 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
 ; CHECK-LABEL: st4b_i8:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
+; CHECK-NEXT:    ret
   %1 = getelementptr i8, i8* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
@@ -232,8 +287,13 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_i16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %1 = getelementptr i16, i16* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
@@ -246,8 +306,13 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_f16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
   %1 = getelementptr half, half* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
@@ -264,8 +329,13 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_i32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %1 = getelementptr i32, i32* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
@@ -278,8 +348,13 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_f32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
   %1 = getelementptr float, float* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
@@ -296,8 +371,13 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_i64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %1 = getelementptr i64, i64* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
@@ -310,8 +390,13 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_f64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
   %1 = getelementptr double, double* %addr, i64 %offset
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index ecd12f53c430a..49c1153135bfc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,11 @@
 
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st2b_i8:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
                                           <vscale x 16 x i1> %pred,
@@ -22,8 +26,11 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st2h_i16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
                                           <vscale x 8 x i1> %pred,
@@ -33,8 +40,11 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: st2h_f16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
                                           <vscale x 8 x i1> %pred,
@@ -44,8 +54,11 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 ; CHECK-LABEL: st2h_bf16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
                                           <vscale x 8 x bfloat> %v1,
                                           <vscale x 8 x i1> %pred,
@@ -59,8 +72,11 @@ define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: st2w_i32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
                                           <vscale x 4 x i1> %pred,
@@ -70,8 +86,11 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: st2w_f32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
                                           <vscale x 4 x i1> %pred,
@@ -85,8 +104,11 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: st2d_i64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
                                           <vscale x 2 x i1> %pred,
@@ -96,8 +118,11 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: st2d_f64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
                                           <vscale x 2 x i1> %pred,
@@ -107,8 +132,11 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i1> %pred, i8** %addr) {
 ; CHECK-LABEL: st2d_ptr:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*> %v0,
                                            <vscale x 2 x i8*> %v1,
                                            <vscale x 2 x i1> %pred,
@@ -122,8 +150,12 @@ define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
 
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st3b_i8:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3b { z0.b, z1.b, z2.b }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
                                           <vscale x 16 x i8> %v2,
@@ -138,8 +170,12 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st3h_i16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
                                           <vscale x 8 x i16> %v2,
@@ -150,8 +186,12 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: st3h_f16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
                                           <vscale x 8 x half> %v2,
@@ -162,8 +202,12 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 ; CHECK-LABEL: st3h_bf16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3h { z0.h, z1.h, z2.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
                                           <vscale x 8 x bfloat> %v1,
                                           <vscale x 8 x bfloat> %v2,
@@ -178,8 +222,12 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: st3w_i32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
                                           <vscale x 4 x i32> %v2,
@@ -190,8 +238,12 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: st3w_f32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3w { z0.s, z1.s, z2.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
                                           <vscale x 4 x float> %v2,
@@ -206,8 +258,12 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: st3d_i64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
                                           <vscale x 2 x i64> %v2,
@@ -218,8 +274,12 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: st3d_f64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
                                           <vscale x 2 x double> %v2,
@@ -230,8 +290,12 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i1> %pred, i8** %addr) {
 ; CHECK-LABEL: st3d_ptr:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
                                            <vscale x 2 x i8*> %v1,
                                            <vscale x 2 x i8*> %v2,
@@ -246,8 +310,13 @@ define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
 
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: st4b_i8:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
                                           <vscale x 16 x i8> %v1,
                                           <vscale x 16 x i8> %v2,
@@ -263,8 +332,13 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: st4h_i16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
                                           <vscale x 8 x i16> %v1,
                                           <vscale x 8 x i16> %v2,
@@ -276,8 +350,13 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: st4h_f16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
                                           <vscale x 8 x half> %v1,
                                           <vscale x 8 x half> %v2,
@@ -289,8 +368,13 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 
 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 ; CHECK-LABEL: st4h_bf16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
                                           <vscale x 8 x bfloat> %v1,
                                           <vscale x 8 x bfloat> %v2,
@@ -306,8 +390,13 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: st4w_i32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
                                           <vscale x 4 x i32> %v1,
                                           <vscale x 4 x i32> %v2,
@@ -319,8 +408,13 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: st4w_f32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
                                           <vscale x 4 x float> %v1,
                                           <vscale x 4 x float> %v2,
@@ -336,8 +430,13 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: st4d_i64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
                                           <vscale x 2 x i64> %v1,
                                           <vscale x 2 x i64> %v2,
@@ -349,8 +448,13 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: st4d_f64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
                                           <vscale x 2 x double> %v1,
                                           <vscale x 2 x double> %v2,
@@ -362,8 +466,13 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 
 define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i8*> %v3, <vscale x 2 x i1> %pred, i8** %addr) {
 ; CHECK-LABEL: st4d_ptr:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
                                            <vscale x 2 x i8*> %v1,
                                            <vscale x 2 x i8*> %v2,
@@ -378,8 +487,9 @@ define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x
 
 define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %addr) {
 ; CHECK-LABEL: stnt1b_i8:
-; CHECK: stnt1b { z0.b }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1b { z0.b }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
                                             <vscale x 16 x i1> %pred,
                                             i8* %addr)
@@ -392,8 +502,9 @@ define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %
 
 define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %addr) {
 ; CHECK-LABEL: stnt1h_i16:
-; CHECK: stnt1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
                                             <vscale x 8 x i1> %pred,
                                             i16* %addr)
@@ -402,8 +513,9 @@ define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16*
 
 define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %addr) {
 ; CHECK-LABEL: stnt1h_f16:
-; CHECK: stnt1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
                                             <vscale x 8 x i1> %pred,
                                             half* %addr)
@@ -412,8 +524,9 @@ define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half
 
 define void @stnt1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 ; CHECK-LABEL: stnt1h_bf16:
-; CHECK: stnt1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> %data,
                                              <vscale x 8 x i1> %pred,
                                              bfloat* %addr)
@@ -426,8 +539,9 @@ define void @stnt1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, b
 
 define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %addr) {
 ; CHECK-LABEL: stnt1w_i32:
-; CHECK: stnt1w { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
                                             <vscale x 4 x i1> %pred,
                                             i32* %addr)
@@ -436,8 +550,9 @@ define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32*
 
 define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %addr) {
 ; CHECK-LABEL: stnt1w_f32:
-; CHECK: stnt1w { z0.s }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
                                             <vscale x 4 x i1> %pred,
                                             float* %addr)
@@ -450,8 +565,9 @@ define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, flo
 
 define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %addr) {
 ; CHECK-LABEL: stnt1d_i64:
-; CHECK: stnt1d { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
                                             <vscale x 2 x i1> %pred,
                                             i64* %addr)
@@ -460,8 +576,9 @@ define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64*
 
 define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %addr) {
 ; CHECK-LABEL: stnt1d_f64:
-; CHECK: stnt1d { z0.d }, p0, [x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
                                             <vscale x 2 x i1> %pred,
                                             double* %addr)
@@ -472,61 +589,79 @@ define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, do
 ; Stores (tuples)
 
 define void @store_i64_tuple3(<vscale x 6 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3) {
-; CHECK-LABEL: store_i64_tuple3
-; CHECK:      st1d { z2.d }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-LABEL: store_i64_tuple3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1d { z2.d }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
   store <vscale x 6 x i64> %tuple, <vscale x 6 x i64>* %out
   ret void
 }
 
 define void @store_i64_tuple4(<vscale x 8 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4) {
-; CHECK-LABEL: store_i64_tuple4
-; CHECK:      st1d { z3.d }, p0, [x0, #3, mul vl]
-; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-LABEL: store_i64_tuple4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1d { z3.d }, p0, [x0, #3, mul vl]
+; CHECK-NEXT:    st1d { z2.d }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4)
   store <vscale x 8 x i64> %tuple, <vscale x 8 x i64>* %out
   ret void
 }
 
 define void @store_i16_tuple2(<vscale x 16 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2) {
-; CHECK-LABEL: store_i16_tuple2
-; CHECK:      st1h { z1.h }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-LABEL: store_i16_tuple2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1h { z1.h }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2)
   store <vscale x 16 x i16> %tuple, <vscale x 16 x i16>* %out
   ret void
 }
 
 define void @store_i16_tuple3(<vscale x 24 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3) {
-; CHECK-LABEL: store_i16_tuple3
-; CHECK:      st1h { z2.h }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-LABEL: store_i16_tuple3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1h { z2.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    st1h { z1.h }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3)
   store <vscale x 24 x i16> %tuple, <vscale x 24 x i16>* %out
   ret void
 }
 
 define void @store_f32_tuple3(<vscale x 12 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3) {
-; CHECK-LABEL: store_f32_tuple3
-; CHECK:      st1w { z2.s }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-LABEL: store_f32_tuple3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1w { z2.s }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3)
   store <vscale x 12 x float> %tuple, <vscale x 12 x float>* %out
   ret void
 }
 
 define void @store_f32_tuple4(<vscale x 16 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4) {
-; CHECK-LABEL: store_f32_tuple4
-; CHECK:      st1w { z3.s }, p0, [x0, #3, mul vl]
-; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-LABEL: store_f32_tuple4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1w { z3.s }, p0, [x0, #3, mul vl]
+; CHECK-NEXT:    st1w { z2.s }, p0, [x0, #2, mul vl]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x0, #1, mul vl]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
   %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4)
   store <vscale x 16 x float> %tuple, <vscale x 16 x float>* %out
   ret void

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
index d6e48579663c8..6d240b92cdda3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s | FileCheck %s
-
-target triple = "aarch64-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; ADD
 ;
 
-define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: add_i8:
-; CHECK: add z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -17,10 +17,11 @@ define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: add_i16:
-; CHECK: add z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -28,10 +29,11 @@ define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: add_i32:
-; CHECK: add z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -39,10 +41,11 @@ define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: add_i64:
-; CHECK: add z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -54,10 +57,11 @@ define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; SUB
 ;
 
-define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sub_i8:
-; CHECK: sub z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -65,10 +69,11 @@ define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sub_i16:
-; CHECK: sub z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -76,10 +81,11 @@ define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sub_i32:
-; CHECK: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -87,10 +93,11 @@ define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sub_i64:
-; CHECK: sub z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -99,10 +106,11 @@ define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 }
 
 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
-define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sub_i32_ptrue_all_b:
-; CHECK: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
@@ -112,10 +120,11 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x
 }
 
 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
-define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sub_i32_ptrue_all_h:
-; CHECK: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -127,11 +136,12 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x
 
 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
 ; thus inactive lanes are important and the immediate form cannot be used.
-define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sub_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: sub z0.s, [[PG]]/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -145,10 +155,11 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x
 ; MUL
 ;
 
-define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: mul_i8:
-; CHECK: mul z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -156,10 +167,11 @@ define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: mul_i16:
-; CHECK: mul z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -167,10 +179,11 @@ define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: mul_i32:
-; CHECK: mul z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -178,10 +191,11 @@ define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: mul_i64:
-; CHECK: mul z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -193,10 +207,11 @@ define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; SMULH
 ;
 
-define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: smulh_i8:
-; CHECK: smulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -204,10 +219,11 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smulh_i16:
-; CHECK: smulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -215,10 +231,11 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smulh_i32:
-; CHECK: smulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -226,10 +243,11 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: smulh_i64:
-; CHECK: smulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -241,10 +259,11 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; UMULH
 ;
 
-define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: umulh_i8:
-; CHECK: umulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -252,10 +271,11 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umulh_i16:
-; CHECK: umulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -263,10 +283,11 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umulh_i32:
-; CHECK: umulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -274,10 +295,11 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: umulh_i64:
-; CHECK: umulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -286,10 +308,11 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 }
 
 ; As umulh_i32 but where pg is i8 based and thus compatible for i32.
-define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umulh_i32_ptrue_all_b:
-; CHECK: umulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
@@ -299,10 +322,11 @@ define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale
 }
 
 ; As umulh_i32 but where pg is i16 based and thus compatible for i32.
-define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umulh_i32_ptrue_all_h:
-; CHECK: umulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -314,11 +338,12 @@ define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale
 
 ; As umulh_i32 but where pg is i64 based, which is not compatibile for i32 and
 ; thus inactive lanes are important and the immediate form cannot be used.
-define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umulh_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: umulh z0.s, [[PG]]/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -332,10 +357,11 @@ define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale
 ; AND
 ;
 
-define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: and_i8:
-; CHECK: and z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -343,10 +369,11 @@ define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: and_i16:
-; CHECK: and z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -354,10 +381,11 @@ define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: and_i32:
-; CHECK: and z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -365,10 +393,11 @@ define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: and_i64:
-; CHECK: and z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -380,10 +409,11 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; BIC
 ;
 
-define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: bic_i8:
-; CHECK: bic z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -391,10 +421,11 @@ define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: bic_i16:
-; CHECK: bic z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -402,10 +433,11 @@ define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: bic_i32:
-; CHECK: bic z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -413,10 +445,11 @@ define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: bic_i64:
-; CHECK: bic z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -428,10 +461,11 @@ define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; EOR
 ;
 
-define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: eor_i8:
-; CHECK: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -439,10 +473,11 @@ define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: eor_i16:
-; CHECK: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -450,10 +485,11 @@ define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: eor_i32:
-; CHECK: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -461,10 +497,11 @@ define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: eor_i64:
-; CHECK: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -476,10 +513,11 @@ define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; ORR
 ;
 
-define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: orr_i8:
-; CHECK: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                <vscale x 16 x i8> %a,
@@ -487,10 +525,11 @@ define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: orr_i16:
-; CHECK: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                <vscale x 8 x i16> %a,
@@ -498,10 +537,11 @@ define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: orr_i32:
-; CHECK: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                <vscale x 4 x i32> %a,
@@ -509,10 +549,11 @@ define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: orr_i64:
-; CHECK: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                <vscale x 2 x i64> %a,
@@ -521,10 +562,11 @@ define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 }
 
 ; As orr_i32 but where pg is i8 based and thus compatible for i32.
-define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: orr_i32_ptrue_all_b:
-; CHECK: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
@@ -534,10 +576,11 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x
 }
 
 ; As orr_i32 but where pg is i16 based and thus compatible for i32.
-define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: orr_i32_ptrue_all_h:
-; CHECK: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -549,11 +592,12 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x
 
 ; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
 ; thus inactive lanes are important and the immediate form cannot be used.
-define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: orr_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: orr z0.s, [[PG]]/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    orr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -567,10 +611,11 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x
 ; SQADD
 ;
 
-define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqadd_i8:
-; CHECK: sqadd z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -578,10 +623,11 @@ define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqadd_i16:
-; CHECK: sqadd z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -589,10 +635,11 @@ define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqadd_i32:
-; CHECK: sqadd z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -600,10 +647,11 @@ define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqadd_i64:
-; CHECK: sqadd z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -615,10 +663,11 @@ define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; SQSUB
 ;
 
-define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqsub_i8:
-; CHECK: sqsub z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -626,10 +675,11 @@ define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqsub_i16:
-; CHECK: sqsub z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -637,10 +687,11 @@ define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqsub_i32:
-; CHECK: sqsub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -648,10 +699,11 @@ define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqsub_i64:
-; CHECK: sqsub z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -663,10 +715,11 @@ define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; UQADD
 ;
 
-define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqadd_i8:
-; CHECK: uqadd z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -674,10 +727,11 @@ define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqadd_i16:
-; CHECK: uqadd z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -685,10 +739,11 @@ define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqadd_i32:
-; CHECK: uqadd z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -696,10 +751,11 @@ define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqadd_i64:
-; CHECK: uqadd z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -711,10 +767,11 @@ define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; UQSUB
 ;
 
-define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqsub_i8:
-; CHECK: uqsub z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -722,10 +779,11 @@ define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqsub_i16:
-; CHECK: uqsub z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -733,10 +791,11 @@ define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqsub_i32:
-; CHECK: uqsub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -744,10 +803,11 @@ define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqsub_i64:
-; CHECK: uqsub z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -756,10 +816,11 @@ define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 }
 
 ; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
-define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqsub_i32_ptrue_all_b:
-; CHECK: uqsub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
@@ -769,10 +830,11 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale
 }
 
 ; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
-define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqsub_i32_ptrue_all_h:
-; CHECK: uqsub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -784,11 +846,12 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale
 
 ; As uqsub_i32 but where pg is i64 based, which is not compatibile for i32 and
 ; thus inactive lanes are important and the immediate form cannot be used.
-define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqsub_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: uqsub z0.s, [[PG]]/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uqsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -802,10 +865,11 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale
 ; ASR (wide)
 ;
 
-define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_i8:
-; CHECK: asr z0.b, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.b, z0.b, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                     <vscale x 16 x i8> %a,
@@ -813,10 +877,11 @@ define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_i16:
-; CHECK: asr z0.h, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.h, z0.h, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x i16> %a,
@@ -824,10 +889,11 @@ define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: asr_i32:
-; CHECK: asr z0.s, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    asr z0.s, z0.s, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x i32> %a,
@@ -839,10 +905,11 @@ define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
 ; LSL (wide)
 ;
 
-define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_i8:
-; CHECK: lsl z0.b, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.b, z0.b, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                     <vscale x 16 x i8> %a,
@@ -850,10 +917,11 @@ define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_i16:
-; CHECK: lsl z0.h, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.h, z0.h, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x i16> %a,
@@ -861,10 +929,11 @@ define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsl_i32:
-; CHECK: lsl z0.s, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.s, z0.s, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x i32> %a,
@@ -876,10 +945,11 @@ define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
 ; LSR (wide)
 ;
 
-define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i8:
-; CHECK: lsr z0.b, z0.b, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.b, z0.b, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
                                                                     <vscale x 16 x i8> %a,
@@ -887,10 +957,11 @@ define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i16:
-; CHECK: lsr z0.h, z0.h, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.h, z0.h, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x i16> %a,
@@ -898,10 +969,11 @@ define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i32:
-; CHECK: lsr z0.s, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.s, z0.s, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
                                                                     <vscale x 4 x i32> %a,
@@ -910,10 +982,11 @@ define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
 }
 
 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
-define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
-; CHECK: lsr z0.s, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.s, z0.s, z1.d
+; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
@@ -923,10 +996,11 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x
 }
 
 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
-define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
-; CHECK: lsr z0.s, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr z0.s, z0.s, z1.d
+; CHECK-NEXT:    ret
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -938,11 +1012,12 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x
 
 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
 ; thus inactive lanes are important and the immediate form cannot be used.
-define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: lsr z0.s, [[PG]]/m, z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT:    ret
   %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -956,10 +1031,11 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x
 ; FADD
 ;
 
-define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fadd_half:
-; CHECK: fadd z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
@@ -967,10 +1043,11 @@ define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half
   ret <vscale x 8 x half> %out
 }
 
-define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fadd_float:
-; CHECK: fadd z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
@@ -978,10 +1055,11 @@ define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x f
   ret <vscale x 4 x float> %out
 }
 
-define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fadd_double:
-; CHECK: fadd z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
@@ -993,10 +1071,11 @@ define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2
 ; FSUB
 ;
 
-define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fsub_half:
-; CHECK: fsub z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
@@ -1004,10 +1083,11 @@ define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half
   ret <vscale x 8 x half> %out
 }
 
-define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fsub_float:
-; CHECK: fsub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
@@ -1015,10 +1095,11 @@ define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x f
   ret <vscale x 4 x float> %out
 }
 
-define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fsub_double:
-; CHECK: fsub z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
@@ -1030,10 +1111,11 @@ define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2
 ; FMUL
 ;
 
-define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmul_half:
-; CHECK: fmul z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %a,
@@ -1041,10 +1123,11 @@ define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half
   ret <vscale x 8 x half> %out
 }
 
-define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmul_float:
-; CHECK: fmul z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x float> %a,
@@ -1052,10 +1135,11 @@ define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x f
   ret <vscale x 4 x float> %out
 }
 
-define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmul_double:
-; CHECK: fmul z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %a,
@@ -1164,5 +1248,3 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
-
-attributes #0 = { "target-features"="+sve2" }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
index d7e6ac42291aa..4c3ab8573ab1f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for
 ;   * @llvm.aarch64.sve.uqinc{b|h|w|d|p}, and
@@ -14,8 +15,9 @@
 
 define <vscale x 8 x i16> @uqdech(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqdech:
-; CHECK: uqdech z0.h, pow2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdech z0.h, pow2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqdech.nxv8i16(<vscale x 8 x i16> %a,
                                                                   i32 0, i32 1)
   ret <vscale x 8 x i16> %out
@@ -27,8 +29,9 @@ define <vscale x 8 x i16> @uqdech(<vscale x 8 x i16> %a) {
 
 define <vscale x 4 x i32> @uqdecw(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqdecw:
-; CHECK: uqdecw z0.s, vl1, mul #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecw z0.s, vl1, mul #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecw.nxv4i32(<vscale x 4 x i32> %a,
                                                                   i32 1, i32 2)
   ret <vscale x 4 x i32> %out
@@ -40,8 +43,9 @@ define <vscale x 4 x i32> @uqdecw(<vscale x 4 x i32> %a) {
 
 define <vscale x 2 x i64> @uqdecd(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqdecd:
-; CHECK: uqdecd z0.d, vl2, mul #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecd z0.d, vl2, mul #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecd.nxv2i64(<vscale x 2 x i64> %a,
                                                                   i32 2, i32 3)
   ret <vscale x 2 x i64> %out
@@ -53,8 +57,9 @@ define <vscale x 2 x i64> @uqdecd(<vscale x 2 x i64> %a) {
 
 define <vscale x 8 x i16> @uqdecp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uqdecp_b16:
-; CHECK: uqdecp z0.h, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp z0.h, p0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqdecp.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i1> %b)
   ret <vscale x 8 x i16> %out
@@ -62,8 +67,9 @@ define <vscale x 8 x i16> @uqdecp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x i32> @uqdecp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uqdecp_b32:
-; CHECK: uqdecp z0.s, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp z0.s, p0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecp.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i1> %b)
   ret <vscale x 4 x i32> %out
@@ -71,8 +77,9 @@ define <vscale x 4 x i32> @uqdecp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @uqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uqdecp_b64:
-; CHECK: uqdecp z0.d, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp z0.d, p0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecp.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i1> %b)
   ret <vscale x 2 x i64> %out
@@ -84,16 +91,18 @@ define <vscale x 2 x i64> @uqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 
 define i32 @uqdecb_n32(i32 %a) {
 ; CHECK-LABEL: uqdecb_n32:
-; CHECK: uqdecb w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecb w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
 }
 
 define i64 @uqdecb_n64(i64 %a) {
 ; CHECK-LABEL: uqdecb_n64:
-; CHECK: uqdecb x0, vl4, mul #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecb x0, vl4, mul #5
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecb.n64(i64 %a, i32 4, i32 5)
   ret i64 %out
 }
@@ -104,16 +113,18 @@ define i64 @uqdecb_n64(i64 %a) {
 
 define i32 @uqdech_n32(i32 %a) {
 ; CHECK-LABEL: uqdech_n32:
-; CHECK: uqdech w0, vl5, mul #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdech w0, vl5, mul #6
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdech.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
 }
 
 define i64 @uqdech_n64(i64 %a) {
 ; CHECK-LABEL: uqdech_n64:
-; CHECK: uqdech x0, vl6, mul #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdech x0, vl6, mul #7
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdech.n64(i64 %a, i32 6, i32 7)
   ret i64 %out
 }
@@ -124,16 +135,18 @@ define i64 @uqdech_n64(i64 %a) {
 
 define i32 @uqdecw_n32(i32 %a) {
 ; CHECK-LABEL: uqdecw_n32:
-; CHECK: uqdecw w0, vl7, mul #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecw w0, vl7, mul #8
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
 }
 
 define i64 @uqdecw_n64(i64 %a) {
 ; CHECK-LABEL: uqdecw_n64:
-; CHECK: uqdecw x0, vl8, mul #9
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecw x0, vl8, mul #9
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecw.n64(i64 %a, i32 8, i32 9)
   ret i64 %out
 }
@@ -144,16 +157,18 @@ define i64 @uqdecw_n64(i64 %a) {
 
 define i32 @uqdecd_n32(i32 %a) {
 ; CHECK-LABEL: uqdecd_n32:
-; CHECK: uqdecd w0, vl16, mul #10
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecd w0, vl16, mul #10
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
 }
 
 define i64 @uqdecd_n64(i64 %a) {
 ; CHECK-LABEL: uqdecd_n64:
-; CHECK: uqdecd x0, vl32, mul #11
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecd x0, vl32, mul #11
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecd.n64(i64 %a, i32 10, i32 11)
   ret i64 %out
 }
@@ -164,64 +179,72 @@ define i64 @uqdecd_n64(i64 %a) {
 
 define i32 @uqdecp_n32_b8(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n32_b8:
-; CHECK: uqdecp w0, p0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp w0, p0.b
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
 }
 
 define i32 @uqdecp_n32_b16(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n32_b16:
-; CHECK: uqdecp w0, p0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp w0, p0.h
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
 }
 
 define i32 @uqdecp_n32_b32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n32_b32:
-; CHECK: uqdecp w0, p0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp w0, p0.s
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
 }
 
 define i32 @uqdecp_n32_b64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n32_b64:
-; CHECK: uqdecp w0, p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp w0, p0.d
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
 }
 
 define i64 @uqdecp_n64_b8(i64 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n64_b8:
-; CHECK: uqdecp x0, p0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp x0, p0.b
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv16i1(i64 %a, <vscale x 16 x i1> %b)
   ret i64 %out
 }
 
 define i64 @uqdecp_n64_b16(i64 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n64_b16:
-; CHECK: uqdecp x0, p0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp x0, p0.h
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv8i1(i64 %a, <vscale x 8 x i1> %b)
   ret i64 %out
 }
 
 define i64 @uqdecp_n64_b32(i64 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n64_b32:
-; CHECK: uqdecp x0, p0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp x0, p0.s
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv4i1(i64 %a, <vscale x 4 x i1> %b)
   ret i64 %out
 }
 
 define i64 @uqdecp_n64_b64(i64 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uqdecp_n64_b64:
-; CHECK: uqdecp x0, p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqdecp x0, p0.d
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv2i1(i64 %a, <vscale x 2 x i1> %b)
   ret i64 %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
index 96324d90d7ef3..9db0a0cf56055 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for
 ;   * @llvm.aarch64.sve.uqinc{b|h|w|d|p}, and
@@ -14,8 +15,9 @@
 
 define <vscale x 8 x i16> @uqinch(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqinch:
-; CHECK: uqinch z0.h, pow2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqinch z0.h, pow2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqinch.nxv8i16(<vscale x 8 x i16> %a,
                                                                   i32 0, i32 1)
   ret <vscale x 8 x i16> %out
@@ -27,8 +29,9 @@ define <vscale x 8 x i16> @uqinch(<vscale x 8 x i16> %a) {
 
 define <vscale x 4 x i32> @uqincw(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqincw:
-; CHECK: uqincw z0.s, vl1, mul #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincw z0.s, vl1, mul #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqincw.nxv4i32(<vscale x 4 x i32> %a,
                                                                   i32 1, i32 2)
   ret <vscale x 4 x i32> %out
@@ -40,8 +43,9 @@ define <vscale x 4 x i32> @uqincw(<vscale x 4 x i32> %a) {
 
 define <vscale x 2 x i64> @uqincd(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqincd:
-; CHECK: uqincd z0.d, vl2, mul #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincd z0.d, vl2, mul #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqincd.nxv2i64(<vscale x 2 x i64> %a,
                                                                   i32 2, i32 3)
   ret <vscale x 2 x i64> %out
@@ -53,8 +57,9 @@ define <vscale x 2 x i64> @uqincd(<vscale x 2 x i64> %a) {
 
 define <vscale x 8 x i16> @uqincp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uqincp_b16:
-; CHECK: uqincp z0.h, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp z0.h, p0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqincp.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i1> %b)
   ret <vscale x 8 x i16> %out
@@ -62,8 +67,9 @@ define <vscale x 8 x i16> @uqincp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %
 
 define <vscale x 4 x i32> @uqincp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uqincp_b32:
-; CHECK: uqincp z0.s, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp z0.s, p0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqincp.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i1> %b)
   ret <vscale x 4 x i32> %out
@@ -71,8 +77,9 @@ define <vscale x 4 x i32> @uqincp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
 
 define <vscale x 2 x i64> @uqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uqincp_b64:
-; CHECK: uqincp z0.d, p0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp z0.d, p0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqincp.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i1> %b)
   ret <vscale x 2 x i64> %out
@@ -84,16 +91,18 @@ define <vscale x 2 x i64> @uqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 
 define i32 @uqincb_n32(i32 %a) {
 ; CHECK-LABEL: uqincb_n32:
-; CHECK: uqincb w0, vl3, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincb w0, vl3, mul #4
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
 }
 
 define i64 @uqincb_n64(i64 %a) {
 ; CHECK-LABEL: uqincb_n64:
-; CHECK: uqincb x0, vl4, mul #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincb x0, vl4, mul #5
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincb.n64(i64 %a, i32 4, i32 5)
   ret i64 %out
 }
@@ -104,16 +113,18 @@ define i64 @uqincb_n64(i64 %a) {
 
 define i32 @uqinch_n32(i32 %a) {
 ; CHECK-LABEL: uqinch_n32:
-; CHECK: uqinch w0, vl5, mul #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqinch w0, vl5, mul #6
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqinch.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
 }
 
 define i64 @uqinch_n64(i64 %a) {
 ; CHECK-LABEL: uqinch_n64:
-; CHECK: uqinch x0, vl6, mul #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqinch x0, vl6, mul #7
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqinch.n64(i64 %a, i32 6, i32 7)
   ret i64 %out
 }
@@ -124,16 +135,18 @@ define i64 @uqinch_n64(i64 %a) {
 
 define i32 @uqincw_n32(i32 %a) {
 ; CHECK-LABEL: uqincw_n32:
-; CHECK: uqincw w0, vl7, mul #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincw w0, vl7, mul #8
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
 }
 
 define i64 @uqincw_n64(i64 %a) {
 ; CHECK-LABEL: uqincw_n64:
-; CHECK: uqincw x0, vl8, mul #9
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincw x0, vl8, mul #9
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincw.n64(i64 %a, i32 8, i32 9)
   ret i64 %out
 }
@@ -144,16 +157,18 @@ define i64 @uqincw_n64(i64 %a) {
 
 define i32 @uqincd_n32(i32 %a) {
 ; CHECK-LABEL: uqincd_n32:
-; CHECK: uqincd w0, vl16, mul #10
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincd w0, vl16, mul #10
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
 }
 
 define i64 @uqincd_n64(i64 %a) {
 ; CHECK-LABEL: uqincd_n64:
-; CHECK: uqincd x0, vl32, mul #11
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincd x0, vl32, mul #11
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincd.n64(i64 %a, i32 10, i32 11)
   ret i64 %out
 }
@@ -164,64 +179,72 @@ define i64 @uqincd_n64(i64 %a) {
 
 define i32 @uqincp_n32_b8(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: uqincp_n32_b8:
-; CHECK: uqincp w0, p0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp w0, p0.b
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
 }
 
 define i32 @uqincp_n32_b16(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uqincp_n32_b16:
-; CHECK: uqincp w0, p0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp w0, p0.h
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
 }
 
 define i32 @uqincp_n32_b32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uqincp_n32_b32:
-; CHECK: uqincp w0, p0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp w0, p0.s
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
 }
 
 define i32 @uqincp_n32_b64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uqincp_n32_b64:
-; CHECK: uqincp w0, p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp w0, p0.d
+; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.uqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
 }
 
 define i64 @uqincp_n64_b8(i64 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: uqincp_n64_b8:
-; CHECK: uqincp x0, p0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp x0, p0.b
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv16i1(i64 %a, <vscale x 16 x i1> %b)
   ret i64 %out
 }
 
 define i64 @uqincp_n64_b16(i64 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: uqincp_n64_b16:
-; CHECK: uqincp x0, p0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp x0, p0.h
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv8i1(i64 %a, <vscale x 8 x i1> %b)
   ret i64 %out
 }
 
 define i64 @uqincp_n64_b32(i64 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: uqincp_n64_b32:
-; CHECK: uqincp x0, p0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp x0, p0.s
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv4i1(i64 %a, <vscale x 4 x i1> %b)
   ret i64 %out
 }
 
 define i64 @uqincp_n64_b64(i64 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: uqincp_n64_b64:
-; CHECK: uqincp x0, p0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqincp x0, p0.d
+; CHECK-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.uqincp.n64.nxv2i1(i64 %a, <vscale x 2 x i1> %b)
   ret i64 %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
index 4fefc05cc322d..669e87d940270 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,64 +8,72 @@
 
 define <vscale x 16 x i1> @whilele_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_b_ww:
-; CHECK: whilele p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilele_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_b_xx:
-; CHECK: whilele p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilele_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_h_ww:
-; CHECK: whilele p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilele_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_h_xx:
-; CHECK: whilele p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilele_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_s_ww:
-; CHECK: whilele p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilele_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_s_xx:
-; CHECK: whilele p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilele_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_d_ww:
-; CHECK: whilele p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_d_xx:
-; CHECK: whilele p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }
@@ -75,64 +84,72 @@ define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
 
 define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_b_ww:
-; CHECK: whilelo p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilelo_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_b_xx:
-; CHECK: whilelo p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilelo_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_h_ww:
-; CHECK: whilelo p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilelo_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_h_xx:
-; CHECK: whilelo p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilelo_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_s_ww:
-; CHECK: whilelo p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilelo_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_s_xx:
-; CHECK: whilelo p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilelo_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_d_ww:
-; CHECK: whilelo p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_d_xx:
-; CHECK: whilelo p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }
@@ -143,64 +160,72 @@ define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
 
 define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_b_ww:
-; CHECK: whilels p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_b_xx:
-; CHECK: whilels p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilels_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_h_ww:
-; CHECK: whilels p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilels_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_h_xx:
-; CHECK: whilels p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilels_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_s_ww:
-; CHECK: whilels p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilels_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_s_xx:
-; CHECK: whilels p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilels_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_d_ww:
-; CHECK: whilels p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_d_xx:
-; CHECK: whilels p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }
@@ -211,64 +236,72 @@ define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
 
 define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_b_ww:
-; CHECK: whilelt p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_b_xx:
-; CHECK: whilelt p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilelt_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_h_ww:
-; CHECK: whilelt p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilelt_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_h_xx:
-; CHECK: whilelt p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilelt_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_s_ww:
-; CHECK: whilelt p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilelt_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_s_xx:
-; CHECK: whilelt p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilelt_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_d_ww:
-; CHECK: whilelt p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilelt_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_d_xx:
-; CHECK: whilelt p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
index 18024ebbb30f5..d74a59be91bae 100644
--- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -6,9 +6,9 @@ define <vscale x 16 x i8> @sel_8_positive(<vscale x 16 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, p0/z, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
-%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer
-ret <vscale x 16 x i8> %sel
+  %vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
+  %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer
+  ret <vscale x 16 x i8> %sel
 }
 
 define <vscale x 8 x i16> @sel_16_positive(<vscale x 8 x i1> %p) {
@@ -16,9 +16,9 @@ define <vscale x 8 x i16> @sel_16_positive(<vscale x 8 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/z, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_32_positive(<vscale x 4 x i1> %p) {
@@ -26,9 +26,9 @@ define <vscale x 4 x i32> @sel_32_positive(<vscale x 4 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/z, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_64_positive(<vscale x 2 x i1> %p) {
@@ -36,9 +36,9 @@ define <vscale x 2 x i64> @sel_64_positive(<vscale x 2 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/z, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 16 x i8> @sel_8_negative(<vscale x 16 x i1> %p) {
@@ -46,9 +46,9 @@ define <vscale x 16 x i8> @sel_8_negative(<vscale x 16 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, p0/z, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 -128, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
-%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer
-ret <vscale x 16 x i8> %sel
+  %vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 -128, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
+  %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer
+  ret <vscale x 16 x i8> %sel
 }
 
 define <vscale x 8 x i16> @sel_16_negative(<vscale x 8 x i1> %p) {
@@ -56,9 +56,9 @@ define <vscale x 8 x i16> @sel_16_negative(<vscale x 8 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/z, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 -128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 -128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_32_negative(<vscale x 4 x i1> %p) {
@@ -66,9 +66,9 @@ define <vscale x 4 x i32> @sel_32_negative(<vscale x 4 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/z, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_64_negative(<vscale x 2 x i1> %p) {
@@ -76,9 +76,9 @@ define <vscale x 2 x i64> @sel_64_negative(<vscale x 2 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/z, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 -128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 -128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 8 x i16> @sel_16_shifted(<vscale x 8 x i1> %p) {
@@ -86,9 +86,9 @@ define <vscale x 8 x i16> @sel_16_shifted(<vscale x 8 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/z, #512 // =0x200
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 512, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 512, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_32_shifted(<vscale x 4 x i1> %p) {
@@ -96,9 +96,9 @@ define <vscale x 4 x i32> @sel_32_shifted(<vscale x 4 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/z, #512 // =0x200
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 512, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 512, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_64_shifted(<vscale x 2 x i1> %p) {
@@ -106,9 +106,9 @@ define <vscale x 2 x i64> @sel_64_shifted(<vscale x 2 x i1> %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/z, #512 // =0x200
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 512, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 512, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
+  ret <vscale x 2 x i64> %sel
 }
 
 ; TODO: We could actually use something like "cpy z0.b, p0/z, #-128". But it's
@@ -123,9 +123,9 @@ define <vscale x 8 x i16> @sel_16_illegal_wrong_extension(<vscale x 8 x i1> %p)
 ; CHECK-NEXT:    mov z0.h, w8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_32_illegal_wrong_extension(<vscale x 4 x i1> %p) {
@@ -136,9 +136,9 @@ define <vscale x 4 x i32> @sel_32_illegal_wrong_extension(<vscale x 4 x i1> %p)
 ; CHECK-NEXT:    mov z0.s, w8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p) {
@@ -148,9 +148,9 @@ define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p)
 ; CHECK-NEXT:    mov z1.d, #128 // =0x80
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 8 x i16> @sel_16_illegal_shifted(<vscale x 8 x i1> %p) {
@@ -161,9 +161,9 @@ define <vscale x 8 x i16> @sel_16_illegal_shifted(<vscale x 8 x i1> %p) {
 ; CHECK-NEXT:    mov z0.h, w8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 513, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 513, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> zeroinitializer
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_32_illegal_shifted(<vscale x 4 x i1> %p) {
@@ -174,9 +174,9 @@ define <vscale x 4 x i32> @sel_32_illegal_shifted(<vscale x 4 x i1> %p) {
 ; CHECK-NEXT:    mov z0.s, w8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 513, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 513, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_64_illegal_shifted(<vscale x 2 x i1> %p) {
@@ -187,9 +187,9 @@ define <vscale x 2 x i64> @sel_64_illegal_shifted(<vscale x 2 x i1> %p) {
 ; CHECK-NEXT:    mov z0.d, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 513, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 513, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 16 x i8> @sel_merge_8_positive(<vscale x 16 x i1> %p, <vscale x 16 x i8> %in) {
@@ -197,9 +197,9 @@ define <vscale x 16 x i8> @sel_merge_8_positive(<vscale x 16 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, p0/m, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
-%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> %in
-ret <vscale x 16 x i8> %sel
+  %vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
+  %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> %in
+  ret <vscale x 16 x i8> %sel
 }
 
 define <vscale x 8 x i16> @sel_merge_16_positive(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
@@ -207,9 +207,9 @@ define <vscale x 8 x i16> @sel_merge_16_positive(<vscale x 8 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/m, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_merge_32_positive(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
@@ -217,9 +217,9 @@ define <vscale x 4 x i32> @sel_merge_32_positive(<vscale x 4 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/m, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_merge_64_positive(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
@@ -227,9 +227,9 @@ define <vscale x 2 x i64> @sel_merge_64_positive(<vscale x 2 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/m, #3 // =0x3
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 16 x i8> @sel_merge_8_negative(<vscale x 16 x i1> %p, <vscale x 16 x i8> %in) {
@@ -237,9 +237,9 @@ define <vscale x 16 x i8> @sel_merge_8_negative(<vscale x 16 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, p0/m, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 -128, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
-%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> %in
-ret <vscale x 16 x i8> %sel
+  %vec = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 -128, i32 0), <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i32> zeroinitializer
+  %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %vec, <vscale x 16 x i8> %in
+  ret <vscale x 16 x i8> %sel
 }
 
 define <vscale x 8 x i16> @sel_merge_16_negative(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
@@ -247,9 +247,9 @@ define <vscale x 8 x i16> @sel_merge_16_negative(<vscale x 8 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/m, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 -128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 -128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_merge_32_negative(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
@@ -257,9 +257,9 @@ define <vscale x 4 x i32> @sel_merge_32_negative(<vscale x 4 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/m, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 -128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_merge_64_negative(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
@@ -267,9 +267,9 @@ define <vscale x 2 x i64> @sel_merge_64_negative(<vscale x 2 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/m, #-128 // =0xffffffffffffff80
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 -128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 -128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 16 x i8> @sel_merge_8_zero(<vscale x 16 x i1> %p, <vscale x 16 x i8> %in) {
@@ -277,8 +277,8 @@ define <vscale x 16 x i8> @sel_merge_8_zero(<vscale x 16 x i1> %p, <vscale x 16
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
-%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %in
-ret <vscale x 16 x i8> %sel
+  %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %in
+  ret <vscale x 16 x i8> %sel
 }
 
 define <vscale x 8 x i16> @sel_merge_16_zero(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
@@ -286,8 +286,8 @@ define <vscale x 8 x i16> @sel_merge_16_zero(<vscale x 8 x i1> %p, <vscale x 8 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %in
-ret <vscale x 8 x i16> %sel
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %in
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_merge_32_zero(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
@@ -295,8 +295,8 @@ define <vscale x 4 x i32> @sel_merge_32_zero(<vscale x 4 x i1> %p, <vscale x 4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %in
-ret <vscale x 4 x i32> %sel
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %in
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_merge_64_zero(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
@@ -304,8 +304,8 @@ define <vscale x 2 x i64> @sel_merge_64_zero(<vscale x 2 x i1> %p, <vscale x 2 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %in
-ret <vscale x 2 x i64> %sel
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %in
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 8 x i16> @sel_merge_16_shifted(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
@@ -313,9 +313,9 @@ define <vscale x 8 x i16> @sel_merge_16_shifted(<vscale x 8 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, p0/m, #512 // =0x200
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 512, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 512, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_merge_32_shifted(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
@@ -323,9 +323,9 @@ define <vscale x 4 x i32> @sel_merge_32_shifted(<vscale x 4 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, p0/m, #512 // =0x200
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 512, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 512, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_merge_64_shifted(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
@@ -333,9 +333,9 @@ define <vscale x 2 x i64> @sel_merge_64_shifted(<vscale x 2 x i1> %p, <vscale x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/m, #512 // =0x200
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 512, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 512, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
+  ret <vscale x 2 x i64> %sel
 }
 
 ; TODO: We could actually use something like "cpy z0.b, p0/m, #-128". But it's
@@ -349,9 +349,9 @@ define <vscale x 8 x i16> @sel_merge_16_illegal_wrong_extension(<vscale x 8 x i1
 ; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 128, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_merge_32_illegal_wrong_extension(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
@@ -361,9 +361,9 @@ define <vscale x 4 x i32> @sel_merge_32_illegal_wrong_extension(<vscale x 4 x i1
 ; CHECK-NEXT:    mov z1.s, w8
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 128, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
@@ -372,9 +372,9 @@ define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1
 ; CHECK-NEXT:    mov z1.d, #128 // =0x80
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
+  ret <vscale x 2 x i64> %sel
 }
 
 define <vscale x 8 x i16> @sel_merge_16_illegal_shifted(<vscale x 8 x i1> %p, <vscale x 8 x i16> %in) {
@@ -384,9 +384,9 @@ define <vscale x 8 x i16> @sel_merge_16_illegal_shifted(<vscale x 8 x i1> %p, <v
 ; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 513, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
-%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
-ret <vscale x 8 x i16> %sel
+  %vec = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 513, i32 0), <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i32> zeroinitializer
+  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %vec, <vscale x 8 x i16> %in
+  ret <vscale x 8 x i16> %sel
 }
 
 define <vscale x 4 x i32> @sel_merge_32_illegal_shifted(<vscale x 4 x i1> %p, <vscale x 4 x i32> %in) {
@@ -396,9 +396,9 @@ define <vscale x 4 x i32> @sel_merge_32_illegal_shifted(<vscale x 4 x i1> %p, <v
 ; CHECK-NEXT:    mov z1.s, w8
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 513, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
-ret <vscale x 4 x i32> %sel
+  %vec = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 513, i32 0), <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %vec, <vscale x 4 x i32> %in
+  ret <vscale x 4 x i32> %sel
 }
 
 define <vscale x 2 x i64> @sel_merge_64_illegal_shifted(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
@@ -408,7 +408,7 @@ define <vscale x 2 x i64> @sel_merge_64_illegal_shifted(<vscale x 2 x i1> %p, <v
 ; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
-%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 513, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
-%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
-ret <vscale x 2 x i64> %sel
+  %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 513, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> %in
+  ret <vscale x 2 x i64> %sel
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll
index 053c6cb3ebf2c..7b7bbe0a9acb3 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -5,8 +6,9 @@
 
 define <vscale x 16 x i8> @addhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: addhnb_h:
-; CHECK: addhnb z0.b, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addhnb z0.b, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.addhnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
@@ -14,8 +16,9 @@ define <vscale x 16 x i8> @addhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 8 x i16> @addhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: addhnb_s:
-; CHECK: addhnb z0.h, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addhnb z0.h, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.addhnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
@@ -23,8 +26,9 @@ define <vscale x 8 x i16> @addhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @addhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: addhnb_d:
-; CHECK: addhnb z0.s, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addhnb z0.s, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.addhnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
@@ -34,8 +38,9 @@ define <vscale x 4 x i32> @addhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @addhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: addhnt_h:
-; CHECK: addhnt z0.b, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addhnt z0.b, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.addhnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c)
@@ -44,8 +49,9 @@ define <vscale x 16 x i8> @addhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b
 
 define <vscale x 8 x i16> @addhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: addhnt_s:
-; CHECK: addhnt z0.h, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addhnt z0.h, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.addhnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c)
@@ -54,8 +60,9 @@ define <vscale x 8 x i16> @addhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @addhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: addhnt_d:
-; CHECK: addhnt z0.s, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addhnt z0.s, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.addhnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 2 x i64> %b,
                                                                   <vscale x 2 x i64> %c)
@@ -66,8 +73,9 @@ define <vscale x 4 x i32> @addhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @raddhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: raddhnb_h:
-; CHECK: raddhnb z0.b, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    raddhnb z0.b, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.raddhnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
@@ -75,8 +83,9 @@ define <vscale x 16 x i8> @raddhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x i16> @raddhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: raddhnb_s:
-; CHECK: raddhnb z0.h, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    raddhnb z0.h, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.raddhnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
@@ -84,8 +93,9 @@ define <vscale x 8 x i16> @raddhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 4 x i32> @raddhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: raddhnb_d:
-; CHECK: raddhnb z0.s, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    raddhnb z0.s, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.raddhnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
@@ -95,8 +105,9 @@ define <vscale x 4 x i32> @raddhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @raddhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: raddhnt_h:
-; CHECK: raddhnt z0.b, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    raddhnt z0.b, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.raddhnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                    <vscale x 8 x i16> %b,
                                                                    <vscale x 8 x i16> %c)
@@ -105,8 +116,9 @@ define <vscale x 16 x i8> @raddhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x i16> @raddhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: raddhnt_s:
-; CHECK: raddhnt z0.h, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    raddhnt z0.h, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.raddhnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                    <vscale x 4 x i32> %b,
                                                                    <vscale x 4 x i32> %c)
@@ -115,8 +127,9 @@ define <vscale x 8 x i16> @raddhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %
 
 define <vscale x 4 x i32> @raddhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: raddhnt_d:
-; CHECK: raddhnt z0.s, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    raddhnt z0.s, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.raddhnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                    <vscale x 2 x i64> %b,
                                                                    <vscale x 2 x i64> %c)
@@ -127,8 +140,9 @@ define <vscale x 4 x i32> @raddhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @rsubhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: rsubhnb_h:
-; CHECK: rsubhnb z0.b, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rsubhnb z0.b, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rsubhnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
@@ -136,8 +150,9 @@ define <vscale x 16 x i8> @rsubhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x i16> @rsubhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: rsubhnb_s:
-; CHECK: rsubhnb z0.h, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rsubhnb z0.h, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rsubhnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
@@ -145,8 +160,9 @@ define <vscale x 8 x i16> @rsubhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 4 x i32> @rsubhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: rsubhnb_d:
-; CHECK: rsubhnb z0.s, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rsubhnb z0.s, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rsubhnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
@@ -156,8 +172,9 @@ define <vscale x 4 x i32> @rsubhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @rsubhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: rsubhnt_h:
-; CHECK: rsubhnt z0.b, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rsubhnt z0.b, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rsubhnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                    <vscale x 8 x i16> %b,
                                                                    <vscale x 8 x i16> %c)
@@ -166,8 +183,9 @@ define <vscale x 16 x i8> @rsubhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x i16> @rsubhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: rsubhnt_s:
-; CHECK: rsubhnt z0.h, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rsubhnt z0.h, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rsubhnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                    <vscale x 4 x i32> %b,
                                                                    <vscale x 4 x i32> %c)
@@ -176,8 +194,9 @@ define <vscale x 8 x i16> @rsubhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %
 
 define <vscale x 4 x i32> @rsubhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: rsubhnt_d:
-; CHECK: rsubhnt z0.s, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rsubhnt z0.s, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rsubhnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                    <vscale x 2 x i64> %b,
                                                                    <vscale x 2 x i64> %c)
@@ -188,8 +207,9 @@ define <vscale x 4 x i32> @rsubhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @subhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: subhnb_h:
-; CHECK: subhnb z0.b, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subhnb z0.b, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subhnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
@@ -197,8 +217,9 @@ define <vscale x 16 x i8> @subhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 8 x i16> @subhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: subhnb_s:
-; CHECK: subhnb z0.h, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subhnb z0.h, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subhnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
@@ -206,8 +227,9 @@ define <vscale x 8 x i16> @subhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @subhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: subhnb_d:
-; CHECK: subhnb z0.s, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subhnb z0.s, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subhnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
@@ -217,8 +239,9 @@ define <vscale x 4 x i32> @subhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @subhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: subhnt_h:
-; CHECK: subhnt z0.b, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subhnt z0.b, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subhnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c)
@@ -227,8 +250,9 @@ define <vscale x 16 x i8> @subhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b
 
 define <vscale x 8 x i16> @subhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: subhnt_s:
-; CHECK: subhnt z0.h, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subhnt z0.h, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subhnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c)
@@ -237,8 +261,9 @@ define <vscale x 8 x i16> @subhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @subhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: subhnt_d:
-; CHECK: subhnt z0.s, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subhnt z0.s, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subhnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 2 x i64> %b,
                                                                   <vscale x 2 x i64> %c)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
index d79e767664cbb..76200ff790027 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @shrnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: shrnb_h:
-; CHECK: shrnb z0.b, z0.h, #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shrnb z0.b, z0.h, #8
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                  i32 8)
   ret <vscale x 16 x i8> %out
@@ -16,8 +18,9 @@ define <vscale x 16 x i8> @shrnb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @shrnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: shrnb_s:
-; CHECK: shrnb z0.h, z0.s, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shrnb z0.h, z0.s, #16
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                  i32 16)
   ret <vscale x 8 x i16> %out
@@ -25,8 +28,9 @@ define <vscale x 8 x i16> @shrnb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @shrnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: shrnb_d:
-; CHECK: shrnb z0.s, z0.d, #32
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shrnb z0.s, z0.d, #32
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                  i32 32)
   ret <vscale x 4 x i32> %out
@@ -38,8 +42,9 @@ define <vscale x 4 x i32> @shrnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @rshrnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: rshrnb_h:
-; CHECK: rshrnb z0.b, z0.h, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rshrnb z0.b, z0.h, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rshrnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   i32 2)
   ret <vscale x 16 x i8> %out
@@ -47,8 +52,9 @@ define <vscale x 16 x i8> @rshrnb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @rshrnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: rshrnb_s:
-; CHECK: rshrnb z0.h, z0.s, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rshrnb z0.h, z0.s, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rshrnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   i32 2)
   ret <vscale x 8 x i16> %out
@@ -56,8 +62,9 @@ define <vscale x 8 x i16> @rshrnb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @rshrnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: rshrnb_d:
-; CHECK: rshrnb z0.s, z0.d, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rshrnb z0.s, z0.d, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rshrnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   i32 2)
   ret <vscale x 4 x i32> %out
@@ -69,8 +76,9 @@ define <vscale x 4 x i32> @rshrnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @uqshrnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqshrnb_h:
-; CHECK: uqshrnb z0.b, z0.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshrnb z0.b, z0.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                    i32 1)
   ret <vscale x 16 x i8> %out
@@ -78,8 +86,9 @@ define <vscale x 16 x i8> @uqshrnb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @uqshrnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqshrnb_s:
-; CHECK: uqshrnb z0.h, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshrnb z0.h, z0.s, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                    i32 1)
   ret <vscale x 8 x i16> %out
@@ -87,8 +96,9 @@ define <vscale x 8 x i16> @uqshrnb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @uqshrnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqshrnb_d:
-; CHECK: uqshrnb z0.s, z0.d, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshrnb z0.s, z0.d, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                    i32 1)
   ret <vscale x 4 x i32> %out
@@ -100,8 +110,9 @@ define <vscale x 4 x i32> @uqshrnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @sqshrnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqshrnb_h:
-; CHECK: sqshrnb z0.b, z0.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrnb z0.b, z0.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                    i32 1)
   ret <vscale x 16 x i8> %out
@@ -109,8 +120,9 @@ define <vscale x 16 x i8> @sqshrnb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @sqshrnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqshrnb_s:
-; CHECK: sqshrnb z0.h, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrnb z0.h, z0.s, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                    i32 1)
   ret <vscale x 8 x i16> %out
@@ -118,8 +130,9 @@ define <vscale x 8 x i16> @sqshrnb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @sqshrnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqshrnb_d:
-; CHECK: sqshrnb z0.s, z0.d, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrnb z0.s, z0.d, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                    i32 1)
   ret <vscale x 4 x i32> %out
@@ -130,9 +143,10 @@ define <vscale x 4 x i32> @sqshrnb_d(<vscale x 2 x i64> %a) {
 ;
 
 define <vscale x 16 x i8> @sqshrunb_h(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: qshrunb_h:
-; CHECK: sqshrunb z0.b, z0.h, #7
-; CHECK-NEXT: ret
+; CHECK-LABEL: sqshrunb_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrunb z0.b, z0.h, #7
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16> %a,
                                                                     i32 7)
   ret <vscale x 16 x i8> %out
@@ -140,8 +154,9 @@ define <vscale x 16 x i8> @sqshrunb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @sqshrunb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqshrunb_s:
-; CHECK: sqshrunb z0.h, z0.s, #15
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrunb z0.h, z0.s, #15
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32> %a,
                                                                     i32 15)
   ret <vscale x 8 x i16> %out
@@ -149,8 +164,9 @@ define <vscale x 8 x i16> @sqshrunb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @sqshrunb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqshrunb_d:
-; CHECK: sqshrunb z0.s, z0.d, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrunb z0.s, z0.d, #31
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64> %a,
                                                                     i32 31)
   ret <vscale x 4 x i32> %out
@@ -162,8 +178,9 @@ define <vscale x 4 x i32> @sqshrunb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @uqrshrnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqrshrnb_h:
-; CHECK: uqrshrnb z0.b, z0.h, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshrnb z0.b, z0.h, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                     i32 2)
   ret <vscale x 16 x i8> %out
@@ -171,8 +188,9 @@ define <vscale x 16 x i8> @uqrshrnb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @uqrshrnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqrshrnb_s:
-; CHECK: uqrshrnb z0.h, z0.s, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshrnb z0.h, z0.s, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                     i32 2)
   ret <vscale x 8 x i16> %out
@@ -180,8 +198,9 @@ define <vscale x 8 x i16> @uqrshrnb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @uqrshrnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqrshrnb_d:
-; CHECK: uqrshrnb z0.s, z0.d, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshrnb z0.s, z0.d, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                     i32 2)
   ret <vscale x 4 x i32> %out
@@ -193,8 +212,9 @@ define <vscale x 4 x i32> @uqrshrnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @sqrshrnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqrshrnb_h:
-; CHECK: sqrshrnb z0.b, z0.h, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrnb z0.b, z0.h, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16> %a,
                                                                     i32 2)
   ret <vscale x 16 x i8> %out
@@ -202,8 +222,9 @@ define <vscale x 16 x i8> @sqrshrnb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @sqrshrnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqrshrnb_s:
-; CHECK: sqrshrnb z0.h, z0.s, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrnb z0.h, z0.s, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32> %a,
                                                                     i32 2)
   ret <vscale x 8 x i16> %out
@@ -211,8 +232,9 @@ define <vscale x 8 x i16> @sqrshrnb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @sqrshrnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqrshrnb_d:
-; CHECK: sqrshrnb z0.s, z0.d, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrnb z0.s, z0.d, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64> %a,
                                                                     i32 2)
   ret <vscale x 4 x i32> %out
@@ -224,8 +246,9 @@ define <vscale x 4 x i32> @sqrshrnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @sqrshrunb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqrshrunb_h:
-; CHECK: sqrshrunb z0.b, z0.h, #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrunb z0.b, z0.h, #6
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16> %a,
                                                                      i32 6)
   ret <vscale x 16 x i8> %out
@@ -233,8 +256,9 @@ define <vscale x 16 x i8> @sqrshrunb_h(<vscale x 8 x i16> %a) {
 
 define <vscale x 8 x i16> @sqrshrunb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqrshrunb_s:
-; CHECK: sqrshrunb z0.h, z0.s, #14
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrunb z0.h, z0.s, #14
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32> %a,
                                                                      i32 14)
   ret <vscale x 8 x i16> %out
@@ -242,8 +266,9 @@ define <vscale x 8 x i16> @sqrshrunb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 4 x i32> @sqrshrunb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqrshrunb_d:
-; CHECK: sqrshrunb z0.s, z0.d, #30
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrunb z0.s, z0.d, #30
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64> %a,
                                                                      i32 30)
   ret <vscale x 4 x i32> %out
@@ -255,8 +280,9 @@ define <vscale x 4 x i32> @sqrshrunb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @shrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: shrnt_h:
-; CHECK: shrnt z0.b, z1.h, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shrnt z0.b, z1.h, #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                  <vscale x 8 x i16> %b,
                                                                  i32 3)
@@ -265,8 +291,9 @@ define <vscale x 16 x i8> @shrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b)
 
 define <vscale x 8 x i16> @shrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: shrnt_s:
-; CHECK: shrnt z0.h, z1.s, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shrnt z0.h, z1.s, #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                  <vscale x 4 x i32> %b,
                                                                  i32 3)
@@ -275,8 +302,9 @@ define <vscale x 8 x i16> @shrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b)
 
 define <vscale x 4 x i32> @shrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: shrnt_d:
-; CHECK: shrnt z0.s, z1.d, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shrnt z0.s, z1.d, #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                  <vscale x 2 x i64> %b,
                                                                  i32 3)
@@ -289,8 +317,9 @@ define <vscale x 4 x i32> @shrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
 
 define <vscale x 16 x i8> @rshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: rshrnt_h:
-; CHECK: rshrnt z0.b, z1.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rshrnt z0.b, z1.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rshrnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   i32 1)
@@ -299,8 +328,9 @@ define <vscale x 16 x i8> @rshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b
 
 define <vscale x 8 x i16> @rshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: rshrnt_s:
-; CHECK: rshrnt z0.h, z1.s, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rshrnt z0.h, z1.s, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rshrnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   i32 5)
@@ -309,8 +339,9 @@ define <vscale x 8 x i16> @rshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @rshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: rshrnt_d:
-; CHECK: rshrnt z0.s, z1.d, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rshrnt z0.s, z1.d, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rshrnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 2 x i64> %b,
                                                                   i32 5)
@@ -323,8 +354,9 @@ define <vscale x 4 x i32> @rshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @uqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqshrnt_h:
-; CHECK: uqshrnt z0.b, z1.h, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshrnt z0.b, z1.h, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                    <vscale x 8 x i16> %b,
                                                                    i32 5)
@@ -333,8 +365,9 @@ define <vscale x 16 x i8> @uqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x i16> @uqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqshrnt_s:
-; CHECK: uqshrnt z0.h, z1.s, #13
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshrnt z0.h, z1.s, #13
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                    <vscale x 4 x i32> %b,
                                                                    i32 13)
@@ -343,8 +376,9 @@ define <vscale x 8 x i16> @uqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %
 
 define <vscale x 4 x i32> @uqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqshrnt_d:
-; CHECK: uqshrnt z0.s, z1.d, #29
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshrnt z0.s, z1.d, #29
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                    <vscale x 2 x i64> %b,
                                                                    i32 29)
@@ -357,8 +391,9 @@ define <vscale x 4 x i32> @uqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @sqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqshrnt_h:
-; CHECK: sqshrnt z0.b, z1.h, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrnt z0.b, z1.h, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                    <vscale x 8 x i16> %b,
                                                                    i32 5)
@@ -367,8 +402,9 @@ define <vscale x 16 x i8> @sqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %
 
 define <vscale x 8 x i16> @sqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqshrnt_s:
-; CHECK: sqshrnt z0.h, z1.s, #13
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrnt z0.h, z1.s, #13
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                    <vscale x 4 x i32> %b,
                                                                    i32 13)
@@ -377,8 +413,9 @@ define <vscale x 8 x i16> @sqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %
 
 define <vscale x 4 x i32> @sqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqshrnt_d:
-; CHECK: sqshrnt z0.s, z1.d, #29
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrnt z0.s, z1.d, #29
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                    <vscale x 2 x i64> %b,
                                                                    i32 29)
@@ -391,8 +428,9 @@ define <vscale x 4 x i32> @sqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @sqshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqshrunt_h:
-; CHECK: sqshrunt z0.b, z1.h, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrunt z0.b, z1.h, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     i32 4)
@@ -401,8 +439,9 @@ define <vscale x 16 x i8> @sqshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16>
 
 define <vscale x 8 x i16> @sqshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqshrunt_s:
-; CHECK: sqshrunt z0.h, z1.s, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrunt z0.h, z1.s, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     i32 4)
@@ -411,8 +450,9 @@ define <vscale x 8 x i16> @sqshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32>
 
 define <vscale x 4 x i32> @sqshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqshrunt_d:
-; CHECK: sqshrunt z0.s, z1.d, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshrunt z0.s, z1.d, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     i32 4)
@@ -425,8 +465,9 @@ define <vscale x 4 x i32> @sqshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @uqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqrshrnt_h:
-; CHECK: uqrshrnt z0.b, z1.h, #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshrnt z0.b, z1.h, #8
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     i32 8)
@@ -435,8 +476,9 @@ define <vscale x 16 x i8> @uqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16>
 
 define <vscale x 8 x i16> @uqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqrshrnt_s:
-; CHECK: uqrshrnt z0.h, z1.s, #12
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshrnt z0.h, z1.s, #12
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     i32 12)
@@ -445,8 +487,9 @@ define <vscale x 8 x i16> @uqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32>
 
 define <vscale x 4 x i32> @uqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqrshrnt_d:
-; CHECK: uqrshrnt z0.s, z1.d, #28
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshrnt z0.s, z1.d, #28
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     i32 28)
@@ -459,8 +502,9 @@ define <vscale x 4 x i32> @uqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqrshrnt_h:
-; CHECK: sqrshrnt z0.b, z1.h, #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrnt z0.b, z1.h, #8
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     i32 8)
@@ -469,8 +513,9 @@ define <vscale x 16 x i8> @sqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16>
 
 define <vscale x 8 x i16> @sqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqrshrnt_s:
-; CHECK: sqrshrnt z0.h, z1.s, #12
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrnt z0.h, z1.s, #12
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     i32 12)
@@ -479,8 +524,9 @@ define <vscale x 8 x i16> @sqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32>
 
 define <vscale x 4 x i32> @sqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrshrnt_d:
-; CHECK: sqrshrnt z0.s, z1.d, #28
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrnt z0.s, z1.d, #28
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     i32 28)
@@ -493,8 +539,9 @@ define <vscale x 4 x i32> @sqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sqrshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqrshrunt_h:
-; CHECK: sqrshrunt z0.b, z1.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrunt z0.b, z1.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8> %a,
                                                                      <vscale x 8 x i16> %b,
                                                                      i32 1)
@@ -503,8 +550,9 @@ define <vscale x 16 x i8> @sqrshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16>
 
 define <vscale x 8 x i16> @sqrshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqrshrunt_s:
-; CHECK: sqrshrunt z0.h, z1.s, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrunt z0.h, z1.s, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16> %a,
                                                                      <vscale x 4 x i32> %b,
                                                                      i32 5)
@@ -513,8 +561,9 @@ define <vscale x 8 x i16> @sqrshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32>
 
 define <vscale x 4 x i32> @sqrshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrshrunt_d:
-; CHECK: sqrshrunt z0.s, z1.d, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshrunt z0.s, z1.d, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32> %a,
                                                                      <vscale x 2 x i64> %b,
                                                                      i32 5)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-bit-permutation.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-bit-permutation.ll
index 13ca4b8967e39..5cbe88213680a 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-bit-permutation.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-bit-permutation.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2,+sve2-bitperm < %s | FileCheck %s
 
 ;
@@ -6,32 +7,36 @@
 
 define <vscale x 16 x i8> @bdep_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: bdep_nxv16i8:
-; CHECK: bdep z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bdep z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bdep.x.nx16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @bdep_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: bdep_nxv8i16:
-; CHECK: bdep z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bdep z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bdep.x.nx8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @bdep_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: bdep_nxv4i32:
-; CHECK: bdep z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bdep z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bdep.x.nx4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @bdep_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: bdep_nxv2i64:
-; CHECK: bdep z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bdep z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bdep.x.nx2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
@@ -42,32 +47,36 @@ define <vscale x 2 x i64> @bdep_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
 
 define <vscale x 16 x i8> @bext_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: bext_nxv16i8:
-; CHECK: bext z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bext z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bext.x.nx16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @bext_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: bext_nxv8i16:
-; CHECK: bext z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bext z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bext.x.nx8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @bext_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: bext_nxv4i32:
-; CHECK: bext z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bext z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bext.x.nx4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @bext_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: bext_nxv2i64:
-; CHECK: bext z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bext z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bext.x.nx2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
@@ -78,32 +87,36 @@ define <vscale x 2 x i64> @bext_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
 
 define <vscale x 16 x i8> @bgrp_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: bgrp_nxv16i8:
-; CHECK: bgrp z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bgrp z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bgrp.x.nx16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @bgrp_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: bgrp_nxv8i16:
-; CHECK: bgrp z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bgrp z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bgrp.x.nx8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @bgrp_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: bgrp_nxv4i32:
-; CHECK: bgrp z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bgrp z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bgrp.x.nx4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @bgrp_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: bgrp_nxv2i64:
-; CHECK: bgrp z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bgrp z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bgrp.x.nx2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
index 3c755a3ea59f3..2712c14def88e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; MATCH
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i1> @match_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: match_i8:
-; CHECK: match p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -16,8 +18,9 @@ define <vscale x 16 x i1> @match_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i1> @match_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: match_i16:
-; CHECK: match p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.match.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -30,8 +33,9 @@ define <vscale x 8 x i1> @match_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a
 
 define <vscale x 16 x i1> @nmatch_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: nmatch_i8:
-; CHECK: match p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    nmatch p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.nmatch.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -40,8 +44,9 @@ define <vscale x 16 x i1> @nmatch_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i1> @nmatch_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: nmatch_i16:
-; CHECK: match p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    nmatch p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.nmatch.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
index 04c0cd3e9a1f6..c4c69f1805794 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll
@@ -1,15 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
-
 ;
 ; CDOT
 ;
 
 define <vscale x 4 x i32> @cdot_s(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: cdot_s:
-; CHECK: cdot z0.s, z1.b, z2.b, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cdot z0.s, z1.b, z2.b, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %c,
@@ -19,8 +20,9 @@ define <vscale x 4 x i32> @cdot_s(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b,
 
 define <vscale x 2 x i64> @cdot_d(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: cdot_d:
-; CHECK: cdot z0.d, z1.h, z2.h, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cdot z0.d, z1.h, z2.h, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cdot.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %c,
@@ -34,8 +36,9 @@ define <vscale x 2 x i64> @cdot_d(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b,
 
 define <vscale x 4 x i32> @cdot_s_idx(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: cdot_s_idx:
-; CHECK: cdot z0.s, z1.b, z2.b[0], #180
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cdot z0.s, z1.b, z2.b[0], #180
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                      <vscale x 16 x i8> %b,
                                                                      <vscale x 16 x i8> %c,
@@ -43,11 +46,11 @@ define <vscale x 4 x i32> @cdot_s_idx(<vscale x 4 x i32> %a, <vscale x 16 x i8>
   ret <vscale x 4 x i32> %out
 }
 
-
 define <vscale x 2 x i64> @cdot_d_idx(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: cdot_d_idx:
-; CHECK: cdot z0.d, z1.h, z2.h[1], #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cdot z0.d, z1.h, z2.h[1], #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cdot.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                      <vscale x 8 x i16> %b,
                                                                      <vscale x 8 x i16> %c,
@@ -55,7 +58,6 @@ define <vscale x 2 x i64> @cdot_d_idx(<vscale x 2 x i64> %a, <vscale x 8 x i16>
   ret <vscale x 2 x i64> %out
 }
 
-
 declare <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.cdot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.cdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32, i32)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll
index 8ca1584bc1792..364ce8752827c 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ;
 ; WHILERW
@@ -7,64 +8,72 @@
 
 define <vscale x 16 x i1> @whilerw_i8(i8* %a, i8* %b) {
 ; CHECK-LABEL: whilerw_i8:
-; CHECK: whilerw  p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nx16i1(i8* %a, i8* %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilerw_i16(i16* %a, i16* %b) {
 ; CHECK-LABEL: whilerw_i16:
-; CHECK: whilerw  p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1(i16* %a, i16* %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilerw_i32(i32* %a, i32* %b) {
 ; CHECK-LABEL: whilerw_i32:
-; CHECK: whilerw  p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1(i32* %a, i32* %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilerw_i64(i64* %a, i64* %b) {
 ; CHECK-LABEL: whilerw_i64:
-; CHECK: whilerw  p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1(i64* %a, i64* %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilerw_bfloat(bfloat* %a, bfloat* %b) {
 ; CHECK-LABEL: whilerw_bfloat:
-; CHECK: whilerw  p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilerw_half(half* %a, half* %b) {
 ; CHECK-LABEL: whilerw_half:
-; CHECK: whilerw  p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nx8i1.f16.f16(half* %a, half* %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilerw_float(float* %a, float* %b) {
 ; CHECK-LABEL: whilerw_float:
-; CHECK: whilerw  p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nx4i1.f32.f32(float* %a, float* %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilerw_double(double* %a, double* %b) {
 ; CHECK-LABEL: whilerw_double:
-; CHECK: whilerw  p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilerw p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nx2i1.f64.f64(double* %a, double* %b)
   ret <vscale x 2 x i1> %out
 }
@@ -75,64 +84,72 @@ define <vscale x 2 x i1> @whilerw_double(double* %a, double* %b) {
 
 define <vscale x 16 x i1> @whilewr_i8(i8* %a, i8* %b) {
 ; CHECK-LABEL: whilewr_i8:
-; CHECK: whilewr  p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nx16i1(i8* %a, i8* %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilewr_i16(i16* %a, i16* %b) {
 ; CHECK-LABEL: whilewr_i16:
-; CHECK: whilewr  p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1(i16* %a, i16* %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilewr_i32(i32* %a, i32* %b) {
 ; CHECK-LABEL: whilewr_i32:
-; CHECK: whilewr  p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1(i32* %a, i32* %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilewr_i64(i64* %a, i64* %b) {
 ; CHECK-LABEL: whilewr_i64:
-; CHECK: whilewr  p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1(i64* %a, i64* %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilewr_bfloat(bfloat* %a, bfloat* %b) {
 ; CHECK-LABEL: whilewr_bfloat:
-; CHECK: whilewr  p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilewr_half(half* %a, half* %b) {
 ; CHECK-LABEL: whilewr_half:
-; CHECK: whilewr  p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nx8i1.f16.f16(half* %a, half* %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilewr_float(float* %a, float* %b) {
 ; CHECK-LABEL: whilewr_float:
-; CHECK: whilewr  p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nx4i1.f32.f32(float* %a, float* %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilewr_double(double* %a, double* %b) {
 ; CHECK-LABEL: whilewr_double:
-; CHECK: whilewr  p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilewr p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nx2i1.f64.f64(double* %a, double* %b)
   ret <vscale x 2 x i1> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
index fca5552027511..3bfaf6dddaef8 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 < %s | FileCheck %s
 
 ;
 ; AESD
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @aesd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: aesd_i8:
-; CHECK: aesd z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    aesd z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> %a,
                                                         <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -19,8 +21,9 @@ define <vscale x 16 x i8> @aesd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 16 x i8> @aesimc_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: aesimc_i8:
-; CHECK: aesimc z0.b, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    aesimc z0.b, z0.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.aesimc(<vscale x 16 x i8> %a)
   ret <vscale x 16 x i8> %out
 }
@@ -31,8 +34,9 @@ define <vscale x 16 x i8> @aesimc_i8(<vscale x 16 x i8> %a) {
 
 define <vscale x 16 x i8> @aese_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: aese_i8:
-; CHECK: aese z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    aese z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> %a,
                                                         <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -44,8 +48,9 @@ define <vscale x 16 x i8> @aese_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 16 x i8> @aesmc_i8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: aesmc_i8:
-; CHECK: aesmc z0.b, z0.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    aesmc z0.b, z0.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.aesmc(<vscale x 16 x i8> %a)
   ret <vscale x 16 x i8> %out
 }
@@ -56,8 +61,9 @@ define <vscale x 16 x i8> @aesmc_i8(<vscale x 16 x i8> %a) {
 
 define <vscale x 2 x i64> @rax1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: rax1_i64:
-; CHECK: rax1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rax1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> %a,
                                                         <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -69,8 +75,9 @@ define <vscale x 2 x i64> @rax1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 4 x i32> @sm4e_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sm4e_i32:
-; CHECK: sm4e z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sm4e z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sm4e(<vscale x 4 x i32> %a,
                                                         <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -82,8 +89,9 @@ define <vscale x 4 x i32> @sm4e_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @sm4ekey_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sm4ekey_i32:
-; CHECK: sm4ekey z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sm4ekey z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sm4ekey(<vscale x 4 x i32> %a,
                                                            <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
index 1c3f52de5afb3..6054034ff1cce 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 4 x float> @fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fcvtlt_f32_f16:
-; CHECK: fcvtlt z0.s, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtlt z0.s, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a,
                                                                    <vscale x 4 x i1> %pg,
                                                                    <vscale x 8 x half> %b)
@@ -17,8 +19,9 @@ define <vscale x 4 x float> @fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 4
 
 define <vscale x 2 x double> @fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvtlt_f64_f32:
-; CHECK: fcvtlt	z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtlt z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a,
                                                                     <vscale x 2 x i1> %pg,
                                                                     <vscale x 4 x float> %b)
@@ -31,8 +34,9 @@ define <vscale x 2 x double> @fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x
 
 define <vscale x 8 x half> @fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvtnt_f16_f32:
-; CHECK: fcvtnt z0.h, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnt z0.h, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a,
                                                              <vscale x 4 x i1> %pg,
                                                              <vscale x 4 x float> %b)
@@ -41,8 +45,9 @@ define <vscale x 8 x half> @fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x
 
 define <vscale x 4 x float> @fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtnt_f32_f64:
-; CHECK: fcvtnt	z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtnt z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a,
                                                                    <vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x double> %b)
@@ -55,8 +60,9 @@ define <vscale x 4 x float> @fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2
 
 define <vscale x 4 x float> @fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtx_f32_f64:
-; CHECK: fcvtx z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtx z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a,
                                                                   <vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x double> %b)
@@ -69,8 +75,9 @@ define <vscale x 4 x float> @fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2
 
 define <vscale x 4 x float> @fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fcvtxnt_f32_f64:
-; CHECK: fcvtxnt z0.s, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtxnt z0.s, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a,
                                                                     <vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll
index 8c678135abf75..8fc6d57cf9025 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ;
 ; FLOGB
@@ -7,8 +8,9 @@
 
 define <vscale x 8 x i16> @flogb_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: flogb_f16:
-; CHECK: flogb z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    flogb z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x half> %b)
@@ -17,8 +19,9 @@ define <vscale x 8 x i16> @flogb_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %p
 
 define <vscale x 4 x i32> @flogb_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: flogb_f32:
-; CHECK: flogb z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    flogb z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x float> %b)
@@ -27,8 +30,9 @@ define <vscale x 4 x i32> @flogb_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %p
 
 define <vscale x 2 x i64> @flogb_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: flogb_f64:
-; CHECK: flogb z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    flogb z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x double> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
index 44da020403b39..97e6aeaa96e1f 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 4 x float> @fmlalb_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlalb_h:
-; CHECK: fmlalb z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlalb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalb.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 8 x half> %b,
                                                                     <vscale x 8 x half> %c)
@@ -21,8 +23,9 @@ define <vscale x 4 x float> @fmlalb_h(<vscale x 4 x float> %a, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmlalb_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlalb_lane_h:
-; CHECK: fmlalb z0.s, z1.h, z2.h[0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlalb z0.s, z1.h, z2.h[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalb.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                          <vscale x 8 x half> %b,
                                                                          <vscale x 8 x half> %c,
@@ -36,8 +39,9 @@ define <vscale x 4 x float> @fmlalb_lane_h(<vscale x 4 x float> %a, <vscale x 8
 
 define <vscale x 4 x float> @fmlalt_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlalt_h:
-; CHECK: fmlalt z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlalt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalt.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 8 x half> %b,
                                                                     <vscale x 8 x half> %c)
@@ -50,8 +54,9 @@ define <vscale x 4 x float> @fmlalt_h(<vscale x 4 x float> %a, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmlalt_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlalt_lane_h:
-; CHECK: fmlalt z0.s, z1.h, z2.h[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlalt z0.s, z1.h, z2.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalt.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                          <vscale x 8 x half> %b,
                                                                          <vscale x 8 x half> %c,
@@ -65,8 +70,9 @@ define <vscale x 4 x float> @fmlalt_lane_h(<vscale x 4 x float> %a, <vscale x 8
 
 define <vscale x 4 x float> @fmlslb_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlslb_h:
-; CHECK: fmlslb z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlslb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslb.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 8 x half> %b,
                                                                     <vscale x 8 x half> %c)
@@ -79,8 +85,9 @@ define <vscale x 4 x float> @fmlslb_h(<vscale x 4 x float> %a, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmlslb_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlslb_lane_h:
-; CHECK: fmlslb z0.s, z1.h, z2.h[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlslb z0.s, z1.h, z2.h[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslb.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                          <vscale x 8 x half> %b,
                                                                          <vscale x 8 x half> %c,
@@ -94,8 +101,9 @@ define <vscale x 4 x float> @fmlslb_lane_h(<vscale x 4 x float> %a, <vscale x 8
 
 define <vscale x 4 x float> @fmlslt_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlslt_h:
-; CHECK: fmlslt z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlslt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslt.nxv4f32(<vscale x 4 x float> %a,
                                                                     <vscale x 8 x half> %b,
                                                                     <vscale x 8 x half> %c)
@@ -108,8 +116,9 @@ define <vscale x 4 x float> @fmlslt_h(<vscale x 4 x float> %a, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmlslt_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
 ; CHECK-LABEL: fmlslt_lane_h:
-; CHECK: fmlslt z0.s, z1.h, z2.h[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmlslt z0.s, z1.h, z2.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslt.lane.nxv4f32(<vscale x 4 x float> %a,
                                                                          <vscale x 8 x half> %b,
                                                                          <vscale x 8 x half> %c,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
index 2c7b50c28f13c..f7b4790b6af3d 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 2 x i64> @mul_lane_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: mul_lane_d:
-; CHECK: mul z0.d, z0.d, z1.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     i32 1)
@@ -17,8 +19,9 @@ define <vscale x 2 x i64> @mul_lane_d(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 4 x i32> @mul_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: mul_lane_s:
-; CHECK: mul z0.s, z0.s, z1.s[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     i32 1)
@@ -27,8 +30,9 @@ define <vscale x 4 x i32> @mul_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 8 x i16> @mul_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: mul_lane_h:
-; CHECK: mul z0.h, z0.h, z1.h[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     i32 1)
@@ -41,8 +45,9 @@ define <vscale x 8 x i16> @mul_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 2 x i64> @mla_lane_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: mla_lane_d:
-; CHECK: mla z0.d, z1.d, z2.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mla z0.d, z1.d, z2.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mla.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     <vscale x 2 x i64> %c,
@@ -52,8 +57,9 @@ define <vscale x 2 x i64> @mla_lane_d(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 4 x i32> @mla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: mla_lane_s:
-; CHECK: mla z0.s, z1.s, z2.s[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mla z0.s, z1.s, z2.s[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     <vscale x 4 x i32> %c,
@@ -63,8 +69,9 @@ define <vscale x 4 x i32> @mla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 8 x i16> @mla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: mla_lane_h:
-; CHECK: mla z0.h, z1.h, z2.h[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mla z0.h, z1.h, z2.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mla.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     <vscale x 8 x i16> %c,
@@ -78,8 +85,9 @@ define <vscale x 8 x i16> @mla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 2 x i64> @mls_lane_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: mls_lane_d:
-; CHECK: mls z0.d, z1.d, z2.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mls z0.d, z1.d, z2.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mls.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     <vscale x 2 x i64> %c,
@@ -89,8 +97,9 @@ define <vscale x 2 x i64> @mls_lane_d(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 4 x i32> @mls_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: mls_lane_s:
-; CHECK: mls z0.s, z1.s, z2.s[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mls z0.s, z1.s, z2.s[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mls.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     <vscale x 4 x i32> %c,
@@ -100,8 +109,9 @@ define <vscale x 4 x i32> @mls_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 8 x i16> @mls_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: mls_lane_h:
-; CHECK: mls z0.h, z1.h, z2.h[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mls z0.h, z1.h, z2.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mls.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     <vscale x 8 x i16> %c,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
index a15267adc7726..7d506166f7679 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @addp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: addp_i8:
-; CHECK: addp z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.addp.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @addp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a
 
 define <vscale x 8 x i16> @addp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: addp_i16:
-; CHECK: addp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.addp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
@@ -27,8 +30,9 @@ define <vscale x 8 x i16> @addp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a
 
 define <vscale x 4 x i32> @addp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: addp_i32:
-; CHECK: addp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @addp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a
 
 define <vscale x 2 x i64> @addp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: addp_i64:
-; CHECK: addp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.addp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
@@ -51,8 +56,9 @@ define <vscale x 2 x i64> @addp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a
 
 define <vscale x 8 x half> @faddp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: faddp_f16:
-; CHECK: faddp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.faddp.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
@@ -61,8 +67,9 @@ define <vscale x 8 x half> @faddp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half>
 
 define <vscale x 4 x float> @faddp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: faddp_f32:
-; CHECK: faddp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
@@ -71,8 +78,9 @@ define <vscale x 4 x float> @faddp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x floa
 
 define <vscale x 2 x double> @faddp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: faddp_f64:
-; CHECK: faddp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.faddp.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
@@ -85,8 +93,9 @@ define <vscale x 2 x double> @faddp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x dou
 
 define <vscale x 8 x half> @fmaxp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmaxp_f16:
-; CHECK: fmaxp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxp.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
@@ -95,8 +104,9 @@ define <vscale x 8 x half> @fmaxp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half>
 
 define <vscale x 4 x float> @fmaxp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmaxp_f32:
-; CHECK: fmaxp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxp.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
@@ -105,8 +115,9 @@ define <vscale x 4 x float> @fmaxp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x floa
 
 define <vscale x 2 x double> @fmaxp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmaxp_f64:
-; CHECK: fmaxp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxp.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
@@ -119,8 +130,9 @@ define <vscale x 2 x double> @fmaxp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x dou
 
 define <vscale x 8 x half> @fmaxnmp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fmaxnmp_f16:
-; CHECK: fmaxnmp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnmp.nxv8f16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x half> %a,
                                                                     <vscale x 8 x half> %b)
@@ -129,8 +141,9 @@ define <vscale x 8 x half> @fmaxnmp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fmaxnmp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fmaxnmp_f32:
-; CHECK: fmaxnmp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnmp.nxv4f32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x float> %a,
                                                                      <vscale x 4 x float> %b)
@@ -139,8 +152,9 @@ define <vscale x 4 x float> @fmaxnmp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fmaxnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fmaxnmp_f64:
-; CHECK: fmaxnmp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnmp.nxv2f64(<vscale x 2 x i1> %pg,
                                                                       <vscale x 2 x double> %a,
                                                                       <vscale x 2 x double> %b)
@@ -153,8 +167,9 @@ define <vscale x 2 x double> @fmaxnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 8 x half> @fminp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fminp_f16:
-; CHECK: fminp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminp.nxv8f16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x half> %a,
                                                                   <vscale x 8 x half> %b)
@@ -163,8 +178,9 @@ define <vscale x 8 x half> @fminp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half>
 
 define <vscale x 4 x float> @fminp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fminp_f32:
-; CHECK: fminp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminp.nxv4f32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x float> %a,
                                                                    <vscale x 4 x float> %b)
@@ -173,8 +189,9 @@ define <vscale x 4 x float> @fminp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x floa
 
 define <vscale x 2 x double> @fminp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fminp_f64:
-; CHECK: fminp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminp.nxv2f64(<vscale x 2 x i1> %pg,
                                                                     <vscale x 2 x double> %a,
                                                                     <vscale x 2 x double> %b)
@@ -187,8 +204,9 @@ define <vscale x 2 x double> @fminp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x dou
 
 define <vscale x 8 x half> @fminnmp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fminnmp_f16:
-; CHECK: fminnmp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnmp.nxv8f16(<vscale x 8 x i1> %pg,
                                                                     <vscale x 8 x half> %a,
                                                                     <vscale x 8 x half> %b)
@@ -197,8 +215,9 @@ define <vscale x 8 x half> @fminnmp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 
 define <vscale x 4 x float> @fminnmp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fminnmp_f32:
-; CHECK: fminnmp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1> %pg,
                                                                      <vscale x 4 x float> %a,
                                                                      <vscale x 4 x float> %b)
@@ -207,8 +226,9 @@ define <vscale x 4 x float> @fminnmp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 
 define <vscale x 2 x double> @fminnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fminnmp_f64:
-; CHECK: fminnmp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnmp.nxv2f64(<vscale x 2 x i1> %pg,
                                                                       <vscale x 2 x double> %a,
                                                                       <vscale x 2 x double> %b)
@@ -221,8 +241,9 @@ define <vscale x 2 x double> @fminnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
 
 define <vscale x 16 x i8> @smaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: smaxp_i8:
-; CHECK: smaxp z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smaxp.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -231,8 +252,9 @@ define <vscale x 16 x i8> @smaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @smaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smaxp_i16:
-; CHECK: smaxp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smaxp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -241,8 +263,9 @@ define <vscale x 8 x i16> @smaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @smaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smaxp_i32:
-; CHECK: smaxp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -251,8 +274,9 @@ define <vscale x 4 x i32> @smaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @smaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: smaxp_i64:
-; CHECK: smaxp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smaxp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -265,8 +289,9 @@ define <vscale x 2 x i64> @smaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @sminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sminp_i8:
-; CHECK: sminp z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sminp.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -275,8 +300,9 @@ define <vscale x 16 x i8> @sminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @sminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sminp_i16:
-; CHECK: sminp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sminp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -285,8 +311,9 @@ define <vscale x 8 x i16> @sminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @sminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sminp_i32:
-; CHECK: sminp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -295,8 +322,9 @@ define <vscale x 4 x i32> @sminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @sminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sminp_i64:
-; CHECK: sminp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sminp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -309,8 +337,9 @@ define <vscale x 2 x i64> @sminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @uminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uminp_i8:
-; CHECK: uminp z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uminp.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -319,8 +348,9 @@ define <vscale x 16 x i8> @uminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uminp_i16:
-; CHECK: uminp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uminp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -329,8 +359,9 @@ define <vscale x 8 x i16> @uminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uminp_i32:
-; CHECK: uminp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -339,8 +370,9 @@ define <vscale x 4 x i32> @uminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @uminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uminp_i64:
-; CHECK: uminp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uminp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -353,8 +385,9 @@ define <vscale x 2 x i64> @uminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @umaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: umaxp_i8:
-; CHECK: umaxp z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umaxp.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -363,8 +396,9 @@ define <vscale x 16 x i8> @umaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @umaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umaxp_i16:
-; CHECK: umaxp z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umaxp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -373,8 +407,9 @@ define <vscale x 8 x i16> @umaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @umaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umaxp_i32:
-; CHECK: umaxp z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -383,8 +418,9 @@ define <vscale x 4 x i32> @umaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @umaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: umaxp_i64:
-; CHECK: umaxp z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umaxp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll
index ab570ce29f07b..8b60c5521f434 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-32bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; LDNT1B
 define <vscale x 4 x i32> @gldnt1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldnt1b_s_uxtw:
-; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1b { z0.s }, p0/z, [z0.s, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -21,8 +23,9 @@ define <vscale x 4 x i32> @gldnt1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vsc
 ; LDNT1H
 define <vscale x 4 x i32> @gldnt1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldnt1h_s_uxtw:
-; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.s }, p0/z, [z0.s, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -33,8 +36,9 @@ define <vscale x 4 x i32> @gldnt1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vs
 ; LDNT1W
 define <vscale x 4 x i32> @gldnt1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldnt1w_s_uxtw:
-; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [z0.s, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
                                                                               i32* %base,
                                                                               <vscale x 4 x i32> %b)
@@ -43,8 +47,9 @@ define <vscale x 4 x i32> @gldnt1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vs
 
 define <vscale x 4 x float> @gldnt1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldnt1w_s_uxtw_float:
-; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [z0.s, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
                                                                                 float* %base,
                                                                                 <vscale x 4 x i32> %b)
@@ -59,8 +64,9 @@ define <vscale x 4 x float> @gldnt1w_s_uxtw_float(<vscale x 4 x i1> %pg, float*
 ; LDNT1SB
 define <vscale x 4 x i32> @gldnt1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldnt1sb_s_uxtw:
-; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sb { z0.s }, p0/z, [z0.s, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
                                                                             i8* %base,
                                                                             <vscale x 4 x i32> %b)
@@ -71,8 +77,9 @@ define <vscale x 4 x i32> @gldnt1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vs
 ; LDNT1SH
 define <vscale x 4 x i32> @gldnt1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: gldnt1sh_s_uxtw:
-; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sh { z0.s }, p0/z, [z0.s, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
                                                                               i16* %base,
                                                                               <vscale x 4 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll
index 34b24284b1b8d..fd5343af9dd3e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -8,10 +9,11 @@
 ;
 
 define <vscale x 2 x i64> @gldnt1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldnt1h_index
-; CHECK:        lsl z0.d, z0.d, #1
-; CHECK-NEXT:   ldnt1h  { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: gldnt1h_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #1
+; CHECK-NEXT:    ldnt1h { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                i16* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -20,10 +22,11 @@ define <vscale x 2 x i64> @gldnt1h_index(<vscale x 2 x i1> %pg, i16* %base, <vsc
 }
 
 define <vscale x 2 x i64> @gldnt1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldnt1w_index
-; CHECK:        lsl z0.d, z0.d, #2
-; CHECK-NEXT:   ldnt1w  { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: gldnt1w_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #2
+; CHECK-NEXT:    ldnt1w { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                i32* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -32,10 +35,11 @@ define <vscale x 2 x i64> @gldnt1w_index(<vscale x 2 x i1> %pg, i32* %base, <vsc
 }
 
 define <vscale x 2 x i64> @gldnt1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldnt1d_index
-; CHECK:        lsl z0.d, z0.d, #3
-; CHECK-NEXT:   ldnt1d  { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: gldnt1d_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #3
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                i64* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -43,10 +47,11 @@ define <vscale x 2 x i64> @gldnt1d_index(<vscale x 2 x i1> %pg, i64* %base, <vsc
 }
 
 define <vscale x 2 x double> @gldnt1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldnt1d_index_double
-; CHECK:        lsl z0.d, z0.d, #3
-; CHECK-NEXT:   ldnt1d  { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: gldnt1d_index_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #3
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
                                                                                   double* %base,
                                                                                   <vscale x 2 x i64> %b)
@@ -61,10 +66,11 @@ define <vscale x 2 x double> @gldnt1d_index_double(<vscale x 2 x i1> %pg, double
 ;
 
 define <vscale x 2 x i64> @gldnt1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldnt1sh_index
-; CHECK:        lsl z0.d, z0.d, #1
-; CHECK-NEXT:   ldnt1sh { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: gldnt1sh_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #1
+; CHECK-NEXT:    ldnt1sh { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
                                                                                i16* %base,
                                                                                <vscale x 2 x i64> %b)
@@ -73,10 +79,11 @@ define <vscale x 2 x i64> @gldnt1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vs
 }
 
 define <vscale x 2 x i64> @gldnt1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: gldnt1sw_index
-; CHECK:        lsl z0.d, z0.d, #2
-; CHECK-NEXT:   ldnt1sw { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: gldnt1sw_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #2
+; CHECK-NEXT:    ldnt1sw { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
                                                                                i32* %base,
                                                                                <vscale x 2 x i64> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll
index bc1e9616f6b8d..d0df22a3902e3 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -7,8 +8,9 @@
 
 define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldnt1b_d:
-; CHECK: ldnt1b { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1b { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> %pg,
                                                                        i8* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -18,8 +20,9 @@ define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x
 
 define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldnt1h_d:
-; CHECK: ldnt1h { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> %pg,
                                                                          i16* %base,
                                                                          <vscale x 2 x i64> %b)
@@ -29,8 +32,9 @@ define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale
 
 define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gldnt1w_d:
-; CHECK: ldnt1w { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> %pg,
                                                                          i32* %base,
                                                                          <vscale x 2 x i64> %offsets)
@@ -40,8 +44,9 @@ define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale
 
 define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldnt1d_d:
-; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> %pg,
                                                                          i64* %base,
                                                                          <vscale x 2 x i64> %b)
@@ -50,8 +55,9 @@ define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale
 
 define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldnt1d_d_double:
-; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> %pg,
                                                                             double* %base,
                                                                             <vscale x 2 x i64> %b)
@@ -65,8 +71,9 @@ define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, double* %b
 
 define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldnt1sb_d:
-; CHECK: ldnt1sb { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sb { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> %pg,
                                                                        i8* %base,
                                                                        <vscale x 2 x i64> %b)
@@ -76,8 +83,9 @@ define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale
 
 define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: gldnt1sh_d:
-; CHECK: ldnt1sh { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sh { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> %pg,
                                                                          i16* %base,
                                                                          <vscale x 2 x i64> %b)
@@ -87,8 +95,9 @@ define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale
 
 define <vscale x 2 x i64> @gldnt1sw_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
 ; CHECK-LABEL: gldnt1sw_d:
-; CHECK: ldnt1sw { z0.d }, p0/z, [z0.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sw { z0.d }, p0/z, [z0.d, x0]
+; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> %pg,
                                                                          i32* %base,
                                                                          <vscale x 2 x i64> %offsets)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll
index 65d33657a0088..402dd595783c7 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-vector-base-scalar-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -8,7 +9,8 @@
 ; LDNT1B
 define <vscale x 4 x i32> @gldnt1b_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1b_s:
-; CHECK:    ldnt1b { z0.s }, p0/z, [z0.s, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1b { z0.s }, p0/z, [z0.s, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
@@ -19,7 +21,8 @@ define <vscale x 4 x i32> @gldnt1b_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1b_d:
-; CHECK:    ldnt1b { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1b { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
@@ -31,7 +34,8 @@ define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 ; LDNT1H
 define <vscale x 4 x i32> @gldnt1h_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1h_s:
-; CHECK:    ldnt1h { z0.s }, p0/z, [z0.s, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.s }, p0/z, [z0.s, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                               <vscale x 4 x i32> %base,
@@ -42,7 +46,8 @@ define <vscale x 4 x i32> @gldnt1h_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1h_d:
-; CHECK:    ldnt1h { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1h { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
@@ -54,7 +59,8 @@ define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 ; LDNT1W
 define <vscale x 4 x i32> @gldnt1w_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1w_s:
-; CHECK:    ldnt1w { z0.s }, p0/z, [z0.s, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [z0.s, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                <vscale x 4 x i32> %base,
@@ -64,7 +70,8 @@ define <vscale x 4 x i32> @gldnt1w_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 4 x float> @gldnt1w_s_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1w_s_float:
-; CHECK:    ldnt1w { z0.s }, p0/z, [z0.s, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [z0.s, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                                  <vscale x 4 x i32> %base,
@@ -74,7 +81,8 @@ define <vscale x 4 x float> @gldnt1w_s_float(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1w_d:
-; CHECK:    ldnt1w { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1w { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
@@ -86,7 +94,8 @@ define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 ; LDNT1D
 define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1d_d:
-; CHECK:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
@@ -97,7 +106,8 @@ define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 ; LDNT1D
 define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1d_d_double:
-; CHECK:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                   <vscale x 2 x i64> %base,
@@ -113,7 +123,8 @@ define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, <vscale x
 ; LDNT1SB
 define <vscale x 4 x i32> @gldnt1sb_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1sb_s:
-; CHECK:    ldnt1sb { z0.s }, p0/z, [z0.s, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sb { z0.s }, p0/z, [z0.s, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                              <vscale x 4 x i32> %base,
@@ -124,7 +135,8 @@ define <vscale x 4 x i32> @gldnt1sb_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1sb_d:
-; CHECK:    ldnt1sb { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sb { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                              <vscale x 2 x i64> %base,
@@ -136,7 +148,8 @@ define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ; LDNT1SH
 define <vscale x 4 x i32> @gldnt1sh_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1sh_s:
-; CHECK:    ldnt1sh { z0.s }, p0/z, [z0.s, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sh { z0.s }, p0/z, [z0.s, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv416.nxv4i32(<vscale x 4 x i1> %pg,
                                                                                               <vscale x 4 x i32> %base,
@@ -147,7 +160,8 @@ define <vscale x 4 x i32> @gldnt1sh_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1sh_d:
-; CHECK:    ldnt1sh { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sh { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,
@@ -159,7 +173,8 @@ define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ; LDNT1SW
 define <vscale x 2 x i64> @gldnt1sw_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: gldnt1sw_d:
-; CHECK:    ldnt1sw { z0.d }, p0/z, [z0.d, x0]
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldnt1sw { z0.d }, p0/z, [z0.d, x0]
 ; CHECK-NEXT:    ret
   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
                                                                                                <vscale x 2 x i64> %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll
index 56836bad61932..876c5b44e2960 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-32bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -9,8 +10,9 @@
 ; STNT1B
 define void @sstnt1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sstnt1b_s_uxtw:
-; CHECK: stnt1b { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1b { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void  @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
                                                          <vscale x 4 x i1> %pg,
@@ -22,8 +24,9 @@ define void @sstnt1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8*
 ; STNT1H
 define void @sstnt1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sstnt1h_s_uxtw:
-; CHECK: stnt1h { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
                                                          <vscale x 4 x i1> %pg,
@@ -35,8 +38,9 @@ define void @sstnt1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16
 ; STNT1W
 define void @sstnt1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sstnt1w_s_uxtw:
-; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data,
                                                          <vscale x 4 x i1> %pg,
                                                          i32* %base,
@@ -46,8 +50,9 @@ define void @sstnt1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32
 
 define void @sstnt1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
 ; CHECK-LABEL: sstnt1w_s_uxtw_float:
-; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT:	ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data,
                                                          <vscale x 4 x i1> %pg,
                                                          float* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll
index fc14d20822b91..4f47a4b7bdea1 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -8,10 +9,11 @@
 ;
 
 define void @sstnt1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sstnt1h_index
-; CHECK:        lsl z1.d, z1.d, #1
-; CHECK-NEXT:   stnt1h  { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: sstnt1h_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z1.d, z1.d, #1
+; CHECK-NEXT:    stnt1h { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                           <vscale x 2 x i1> %pg,
@@ -21,10 +23,11 @@ define void @sstnt1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16*
 }
 
 define void @sstnt1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sstnt1w_index
-; CHECK:        lsl z1.d, z1.d, #2
-; CHECK-NEXT:   stnt1w  { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: sstnt1w_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z1.d, z1.d, #2
+; CHECK-NEXT:    stnt1w { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                           <vscale x 2 x i1> %pg,
@@ -34,10 +37,11 @@ define void @sstnt1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32*
 }
 
 define void  @sstnt1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sstnt1d_index
-; CHECK:        lsl z1.d, z1.d, #3
-; CHECK-NEXT:   stnt1d  { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: sstnt1d_index:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z1.d, z1.d, #3
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> %data,
                                                           <vscale x 2 x i1> %pg,
                                                           i64* %base,
@@ -46,10 +50,11 @@ define void  @sstnt1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64
 }
 
 define void  @sstnt1d_index_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %offsets) {
-; CHECK-LABEL: sstnt1d_index_double
-; CHECK:        lsl z1.d, z1.d, #3
-; CHECK-NEXT:   stnt1d  { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT:   ret
+; CHECK-LABEL: sstnt1d_index_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z1.d, z1.d, #3
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> %data,
                                                           <vscale x 2 x i1> %pg,
                                                           double* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll
index 6cf4d5d90518b..a83bbb8454b6c 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-unscaled-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -7,8 +8,9 @@
 
 define void @sstnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sstnt1b_d:
-; CHECK: stnt1b { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1b { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> %data_trunc,
                                                    <vscale x 2 x i1> %pg,
@@ -19,8 +21,9 @@ define void @sstnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %bas
 
 define void @sstnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sstnt1h_d:
-; CHECK: stnt1h { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> %data_trunc,
                                                     <vscale x 2 x i1> %pg,
@@ -31,8 +34,9 @@ define void @sstnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %ba
 
 define void @sstnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sstnt1w_d:
-; CHECK: stnt1w { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> %data_trunc,
                                                     <vscale x 2 x i1> %pg,
@@ -43,8 +47,9 @@ define void @sstnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %ba
 
 define void @sstnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sstnt1d_d:
-; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> %data,
                                                     <vscale x 2 x i1> %pg,
                                                     i64* %base,
@@ -54,8 +59,9 @@ define void @sstnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %ba
 
 define void @sstnt1d_d_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sstnt1d_d_double:
-; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> %data,
                                                     <vscale x 2 x i1> %pg,
                                                     double* %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll
index 26d84feda33c5..096d802c21562 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-vector-base-scalar-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
@@ -8,8 +9,9 @@
 ; STNT1B
 define void @stnt1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1b_s:
-; CHECK: stnt1b { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1b { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
                                                                          <vscale x 4 x i1> %pg,
@@ -20,8 +22,9 @@ define void @stnt1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x
 
 define void @stnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1b_d:
-; CHECK: stnt1b { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1b { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
                                                                          <vscale x 2 x i1> %pg,
@@ -33,8 +36,9 @@ define void @stnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x
 ; STNT1H
 define void @stnt1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1h_s:
-; CHECK: stnt1h { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
                                                                           <vscale x 4 x i1> %pg,
@@ -45,8 +49,9 @@ define void @stnt1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x
 
 define void @stnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1h_d:
-; CHECK: stnt1h { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1h { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
                                                                           <vscale x 2 x i1> %pg,
@@ -58,8 +63,9 @@ define void @stnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x
 ; STNT1W
 define void @stnt1w_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1w_s:
-; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
                                                                           <vscale x 4 x i1> %pg,
                                                                           <vscale x 4 x i32> %base,
@@ -69,8 +75,9 @@ define void @stnt1w_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x
 
 define void @stnt1w_f32_s(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1w_f32_s:
-; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.s }, p0, [z1.s, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
                                                                           <vscale x 4 x i1> %pg,
                                                                           <vscale x 4 x i32> %base,
@@ -80,8 +87,9 @@ define void @stnt1w_f32_s(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vs
 
 define void @stnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1w_d:
-; CHECK: stnt1w { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1w { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   %data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
                                                                           <vscale x 2 x i1> %pg,
@@ -93,8 +101,9 @@ define void @stnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x
 ; STNT1D
 define void @stnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1d_d:
-; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
                                                                           <vscale x 2 x i1> %pg,
                                                                           <vscale x 2 x i64> %base,
@@ -104,8 +113,9 @@ define void @stnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x
 
 define void @stnt1d_f64_d(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 ; CHECK-LABEL: stnt1d_f64_d:
-; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stnt1d { z0.d }, p0, [z1.d, x0]
+; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
                                                                           <vscale x 2 x i1> %pg,
                                                                           <vscale x 2 x i64> %base,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
index f9c8ec91d3085..9fd1eb616c28c 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
@@ -1,99 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; TBL2
 ;
 
-define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unused,
-                                  <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: tbl2_b:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.b, { z1.b, z2.b }, z3.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.b, { z1.b, z2.b }, z3.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl2.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %out
 }
 
-define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unused,
-                                  <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbl2_h:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl2.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %out
 }
 
-define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unused,
-                                  <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbl2_s:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.s, { z1.s, z2.s }, z3.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl2.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %out
 }
 
-define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unused,
-                                  <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbl2_d:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.d, { z1.d, z2.d }, z3.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl2.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %out
 }
 
-define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half> %unused,
-                                    <vscale x 8 x half> %b, <vscale x 8 x i16> %c) {
+define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half> %unused, <vscale x 8 x half> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbl2_fh:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl2.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
                                                                  <vscale x 8 x i16> %c)
   ret <vscale x 8 x half> %out
 }
 
-define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %unused,
-                                        <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) #0 {
+define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %unused, <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: tbl2_bf16:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl2.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b,
                                                                     <vscale x 8 x i16> %c)
   ret <vscale x 8 x bfloat> %out
 }
 
-define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x float> %unused,
-                                     <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
+define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x float> %unused, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbl2_fs:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.s, { z1.s, z2.s }, z3.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl2.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
                                                                   <vscale x 4 x i32> %c)
   ret <vscale x 4 x float> %out
 }
 
-define <vscale x 2 x double> @tbl2_fd(<vscale x 2 x double> %a, <vscale x 2 x double> %unused,
-                                      <vscale x 2 x double> %b, <vscale x 2 x i64> %c) {
+define <vscale x 2 x double> @tbl2_fd(<vscale x 2 x double> %a, <vscale x 2 x double> %unused, <vscale x 2 x double> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbl2_fd:
-; CHECK: mov z1.d, z0.d
-; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.d, { z1.d, z2.d }, z3.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl2.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,
                                                                    <vscale x 2 x i64> %c)
@@ -106,8 +115,9 @@ define <vscale x 2 x double> @tbl2_fd(<vscale x 2 x double> %a, <vscale x 2 x do
 
 define <vscale x 16 x i8> @tbx_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: tbx_b:
-; CHECK: tbx z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbx.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                <vscale x 16 x i8> %c)
@@ -116,8 +126,9 @@ define <vscale x 16 x i8> @tbx_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <
 
 define <vscale x 8 x i16> @tbx_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbx_h:
-; CHECK: tbx z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbx.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                <vscale x 8 x i16> %c)
@@ -126,8 +137,9 @@ define <vscale x 8 x i16> @tbx_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <
 
 define <vscale x 8 x half> @ftbx_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: ftbx_h:
-; CHECK: tbx z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbx.nxv8f16(<vscale x 8 x half> %a,
                                                                 <vscale x 8 x half> %b,
                                                                 <vscale x 8 x i16> %c)
@@ -136,8 +148,9 @@ define <vscale x 8 x half> @ftbx_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
 
 define <vscale x 8 x bfloat> @ftbx_h_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: ftbx_h_bf16:
-; CHECK: tbx z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbx.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                    <vscale x 8 x bfloat> %b,
                                                                    <vscale x 8 x i16> %c)
@@ -146,8 +159,9 @@ define <vscale x 8 x bfloat> @ftbx_h_bf16(<vscale x 8 x bfloat> %a, <vscale x 8
 
 define <vscale x 4 x i32> @tbx_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbx_s:
-; CHECK: tbx z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbx.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                <vscale x 4 x i32> %c)
@@ -156,8 +170,9 @@ define <vscale x 4 x i32> @tbx_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <
 
 define <vscale x 4 x float> @ftbx_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: ftbx_s:
-; CHECK: tbx z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbx.nxv4f32(<vscale x 4 x float> %a,
                                                                  <vscale x 4 x float> %b,
                                                                  <vscale x 4 x i32> %c)
@@ -166,8 +181,9 @@ define <vscale x 4 x float> @ftbx_s(<vscale x 4 x float> %a, <vscale x 4 x float
 
 define <vscale x 2 x i64> @tbx_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbx_d:
-; CHECK: tbx z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbx.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                <vscale x 2 x i64> %c)
@@ -176,8 +192,9 @@ define <vscale x 2 x i64> @tbx_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <
 
 define <vscale x 2 x double> @ftbx_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: ftbx_d:
-; CHECK: tbx z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    tbx z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbx.nxv2f64(<vscale x 2 x double> %a,
                                                                   <vscale x 2 x double> %b,
                                                                   <vscale x 2 x i64> %c)
@@ -207,4 +224,4 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.tbx.nxv2f64(<vscale x 2 x double
 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbx.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i16>)
 
 ; +bf16 is required for the bfloat version.
-attributes #0 = { "target-features"="+sve,+bf16" }
+attributes #0 = { "target-features"="+sve2,+bf16" }

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
index 21df257a269cc..f695fd444be77 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic-128.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes < %s | FileCheck %s
 
 ;
 ; PMULLB
@@ -6,8 +7,9 @@
 
 define <vscale x 2 x i64> @pmullb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: pmullb_i64:
-; CHECK: pmullb z0.q, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmullb z0.q, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.pmullb.pair.nxv2i64(<vscale x 2 x i64> %a,
                                                                        <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -19,8 +21,9 @@ define <vscale x 2 x i64> @pmullb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @pmullt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: pmullt_i64:
-; CHECK: pmullt z0.q, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmullt z0.q, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.pmullt.pair.nxv2i64(<vscale x 2 x i64> %a,
                                                                        <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
index 8b725f43d20e4..ba895226cfc64 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 ;
 ; EORBT
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @eorbt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: eorbt_i8:
-; CHECK: eorbt z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorbt z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eorbt.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b,
                                                                  <vscale x 16 x i8> %c)
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @eorbt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 8 x i16> @eorbt_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: eorbt_i16:
-; CHECK: eorbt z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorbt z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eorbt.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b,
                                                                  <vscale x 8 x i16> %c)
@@ -27,8 +30,9 @@ define <vscale x 8 x i16> @eorbt_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @eorbt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: eorbt_i32:
-; CHECK: eorbt z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorbt z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eorbt.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b,
                                                                  <vscale x 4 x i32> %c)
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @eorbt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @eorbt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: eorbt_i64:
-; CHECK: eorbt z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorbt z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eorbt.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %c)
@@ -51,8 +56,9 @@ define <vscale x 2 x i64> @eorbt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @eortb_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: eortb_i8:
-; CHECK: eortb z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eortb z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eortb.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b,
                                                                  <vscale x 16 x i8> %c)
@@ -61,8 +67,9 @@ define <vscale x 16 x i8> @eortb_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 8 x i16> @eortb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: eortb_i16:
-; CHECK: eortb z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eortb z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eortb.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b,
                                                                  <vscale x 8 x i16> %c)
@@ -71,8 +78,9 @@ define <vscale x 8 x i16> @eortb_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @eortb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: eortb_i32:
-; CHECK: eortb z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eortb z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eortb.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b,
                                                                  <vscale x 4 x i32> %c)
@@ -81,8 +89,9 @@ define <vscale x 4 x i32> @eortb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @eortb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: eortb_i64:
-; CHECK: eortb z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eortb z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eortb.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %c)
@@ -95,8 +104,9 @@ define <vscale x 2 x i64> @eortb_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @pmullb_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: pmullb_i8:
-; CHECK: pmullb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmullb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullb.pair.nxv16i8(<vscale x 16 x i8> %a,
                                                                        <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -104,8 +114,9 @@ define <vscale x 16 x i8> @pmullb_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 4 x i32> @pmullb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: pmullb_i32:
-; CHECK: pmullb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmullb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullb.pair.nxv4i32(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -117,8 +128,9 @@ define <vscale x 4 x i32> @pmullb_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 16 x i8> @pmullt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: pmullt_i8:
-; CHECK: pmullt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmullt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullt.pair.nxv16i8(<vscale x 16 x i8> %a,
                                                                        <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -126,8 +138,9 @@ define <vscale x 16 x i8> @pmullt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 4 x i32> @pmullt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: pmullt_i32:
-; CHECK: pmullt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmullt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullt.pair.nxv4i32(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll
index 4d16a00f07bdd..8896322f4cab5 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,24 +8,27 @@
 
 define <vscale x 16 x i8> @sqxtnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqxtnb_h:
-; CHECK: sqxtnb z0.b, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtnb z0.b, z0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqxtnb.nxv8i16(<vscale x 8 x i16> %a)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @sqxtnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqxtnb_s:
-; CHECK: sqxtnb z0.h, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtnb z0.h, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqxtnb.nxv4i32(<vscale x 4 x i32> %a)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @sqxtnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqxtnb_d:
-; CHECK: sqxtnb z0.s, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtnb z0.s, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqxtnb.nxv2i64(<vscale x 2 x i64> %a)
   ret <vscale x 4 x i32> %out
 }
@@ -35,24 +39,27 @@ define <vscale x 4 x i32> @sqxtnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @uqxtnb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqxtnb_h:
-; CHECK: uqxtnb z0.b, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqxtnb z0.b, z0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqxtnb.nxv8i16(<vscale x 8 x i16> %a)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @uqxtnb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqxtnb_s:
-; CHECK: uqxtnb z0.h, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqxtnb z0.h, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqxtnb.nxv4i32(<vscale x 4 x i32> %a)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @uqxtnb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqxtnb_d:
-; CHECK: uqxtnb z0.s, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqxtnb z0.s, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqxtnb.nxv2i64(<vscale x 2 x i64> %a)
   ret <vscale x 4 x i32> %out
 }
@@ -63,24 +70,27 @@ define <vscale x 4 x i32> @uqxtnb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @sqxtunb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqxtunb_h:
-; CHECK: sqxtunb z0.b, z0.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtunb z0.b, z0.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqxtunb.nxv8i16(<vscale x 8 x i16> %a)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @sqxtunb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqxtunb_s:
-; CHECK: sqxtunb z0.h, z0.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtunb z0.h, z0.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqxtunb.nxv4i32(<vscale x 4 x i32> %a)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @sqxtunb_d(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqxtunb_d:
-; CHECK: sqxtunb z0.s, z0.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtunb z0.s, z0.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqxtunb.nxv2i64(<vscale x 2 x i64> %a)
   ret <vscale x 4 x i32> %out
 }
@@ -91,28 +101,31 @@ define <vscale x 4 x i32> @sqxtunb_d(<vscale x 2 x i64> %a) {
 
 define <vscale x 16 x i8> @sqxtnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqxtnt_h:
-; CHECK: sqxtnt z0.b, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtnt z0.b, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqxtnt.nxv8i16(<vscale x 16 x i8> %a,
-                                                             <vscale x 8 x i16> %b)
+                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @sqxtnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqxtnt_s:
-; CHECK: sqxtnt z0.h, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtnt z0.h, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqxtnt.nxv4i32(<vscale x 8 x i16> %a,
-                                                             <vscale x 4 x i32> %b)
+                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @sqxtnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqxtnt_d:
-; CHECK: sqxtnt z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtnt z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqxtnt.nxv2i64(<vscale x 4 x i32> %a,
-                                                             <vscale x 2 x i64> %b)
+                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -122,28 +135,31 @@ define <vscale x 4 x i32> @sqxtnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @uqxtnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqxtnt_h:
-; CHECK: uqxtnt z0.b, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqxtnt z0.b, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqxtnt.nxv8i16(<vscale x 16 x i8> %a,
-                                                             <vscale x 8 x i16> %b)
+                                                                  <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @uqxtnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqxtnt_s:
-; CHECK: uqxtnt z0.h, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqxtnt z0.h, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqxtnt.nxv4i32(<vscale x 8 x i16> %a,
-                                                             <vscale x 4 x i32> %b)
+                                                                  <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @uqxtnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqxtnt_d:
-; CHECK: uqxtnt z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqxtnt z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqxtnt.nxv2i64(<vscale x 4 x i32> %a,
-                                                             <vscale x 2 x i64> %b)
+                                                                  <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -153,28 +169,31 @@ define <vscale x 4 x i32> @uqxtnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @sqxtunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqxtunt_h:
-; CHECK: sqxtunt z0.b, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtunt z0.b, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqxtunt.nxv8i16(<vscale x 16 x i8> %a,
-                                                              <vscale x 8 x i16> %b)
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 16 x i8> %out
 }
 
 define <vscale x 8 x i16> @sqxtunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqxtunt_s:
-; CHECK: sqxtunt z0.h, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtunt z0.h, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqxtunt.nxv4i32(<vscale x 8 x i16> %a,
-                                                              <vscale x 4 x i32> %b)
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @sqxtunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqxtunt_d:
-; CHECK: sqxtunt z0.s, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqxtunt z0.s, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqxtunt.nxv2i64(<vscale x 4 x i32> %a,
-                                                              <vscale x 2 x i64> %b)
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 4 x i32> %out
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
index 70bd77994a6d5..7ae5f883ca112 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 16 x i8> @cadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cadd_b:
-; CHECK: cadd z0.b, z0.b, z1.b, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cadd z0.b, z0.b, z1.b, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b,
                                                                   i32 90)
@@ -17,8 +19,9 @@ define <vscale x 16 x i8> @cadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @cadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cadd_h:
-; CHECK: cadd z0.h, z0.h, z1.h, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cadd z0.h, z0.h, z1.h, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   i32 90)
@@ -27,8 +30,9 @@ define <vscale x 8 x i16> @cadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 
 define <vscale x 4 x i32> @cadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cadd_s:
-; CHECK: cadd z0.s, z0.s, z1.s, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cadd z0.s, z0.s, z1.s, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   i32 270)
@@ -37,8 +41,9 @@ define <vscale x 4 x i32> @cadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 
 define <vscale x 2 x i64> @cadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cadd_d:
-; CHECK: cadd z0.d, z0.d, z1.d, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cadd z0.d, z0.d, z1.d, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b,
                                                                   i32 270)
@@ -51,8 +56,9 @@ define <vscale x 2 x i64> @cadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 
 define <vscale x 16 x i8> @sqcadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqcadd_b:
-; CHECK: sqcadd z0.b, z0.b, z1.b, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqcadd z0.b, z0.b, z1.b, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b,
                                                                     i32 90)
@@ -61,8 +67,9 @@ define <vscale x 16 x i8> @sqcadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 8 x i16> @sqcadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqcadd_h:
-; CHECK: sqcadd z0.h, z0.h, z1.h, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqcadd z0.h, z0.h, z1.h, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     i32 90)
@@ -71,8 +78,9 @@ define <vscale x 8 x i16> @sqcadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @sqcadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqcadd_s:
-; CHECK: sqcadd z0.s, z0.s, z1.s, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqcadd z0.s, z0.s, z1.s, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     i32 270)
@@ -81,8 +89,9 @@ define <vscale x 4 x i32> @sqcadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @sqcadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqcadd_d:
-; CHECK: sqcadd z0.d, z0.d, z1.d, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqcadd z0.d, z0.d, z1.d, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     i32 270)
@@ -95,8 +104,9 @@ define <vscale x 2 x i64> @sqcadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @cmla_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: cmla_b:
-; CHECK: cmla z0.b, z1.b, z2.b, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmla z0.b, z1.b, z2.b, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b,
                                                                   <vscale x 16 x i8> %c,
@@ -106,8 +116,9 @@ define <vscale x 16 x i8> @cmla_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
 
 define <vscale x 8 x i16> @cmla_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: cmla_h:
-; CHECK: cmla z0.h, z1.h, z2.h, #180
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmla z0.h, z1.h, z2.h, #180
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c,
@@ -117,8 +128,9 @@ define <vscale x 8 x i16> @cmla_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
 
 define <vscale x 4 x i32> @cmla_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: cmla_s:
-; CHECK: cmla z0.s, z1.s, z2.s, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmla z0.s, z1.s, z2.s, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c,
@@ -128,8 +140,9 @@ define <vscale x 4 x i32> @cmla_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
 
 define <vscale x 2 x i64> @cmla_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: cmla_d:
-; CHECK: cmla z0.d, z1.d, z2.d, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmla z0.d, z1.d, z2.d, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b,
                                                                   <vscale x 2 x i64> %c,
@@ -143,8 +156,9 @@ define <vscale x 2 x i64> @cmla_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b,
 
 define <vscale x 8 x i16> @cmla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: cmla_lane_h:
-; CHECK: cmla z0.h, z1.h, z2.h[1], #180
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmla z0.h, z1.h, z2.h[1], #180
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        <vscale x 8 x i16> %c,
@@ -155,8 +169,9 @@ define <vscale x 8 x i16> @cmla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @cmla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: cmla_lane_s:
-; CHECK: cmla z0.s, z1.s, z2.s[0], #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmla z0.s, z1.s, z2.s[0], #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        <vscale x 4 x i32> %c,
@@ -171,8 +186,9 @@ define <vscale x 4 x i32> @cmla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 16 x i8> @sqrdcmlah_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sqrdcmlah_b:
-; CHECK: sqrdcmlah z0.b, z1.b, z2.b, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdcmlah z0.b, z1.b, z2.b, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8> %a,
                                                                        <vscale x 16 x i8> %b,
                                                                        <vscale x 16 x i8> %c,
@@ -182,8 +198,9 @@ define <vscale x 16 x i8> @sqrdcmlah_b(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqrdcmlah_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sqrdcmlah_h:
-; CHECK: sqrdcmlah z0.h, z1.h, z2.h, #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdcmlah z0.h, z1.h, z2.h, #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        <vscale x 8 x i16> %c,
@@ -193,8 +210,9 @@ define <vscale x 8 x i16> @sqrdcmlah_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqrdcmlah_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sqrdcmlah_s:
-; CHECK: sqrdcmlah z0.s, z1.s, z2.s, #180
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdcmlah z0.s, z1.s, z2.s, #180
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        <vscale x 4 x i32> %c,
@@ -204,8 +222,9 @@ define <vscale x 4 x i32> @sqrdcmlah_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqrdcmlah_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sqrdcmlah_d:
-; CHECK: sqrdcmlah z0.d, z1.d, z2.d, #270
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdcmlah z0.d, z1.d, z2.d, #270
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64> %a,
                                                                        <vscale x 2 x i64> %b,
                                                                        <vscale x 2 x i64> %c,
@@ -219,8 +238,9 @@ define <vscale x 2 x i64> @sqrdcmlah_d(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 8 x i16> @sqrdcmlah_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sqrdcmlah_lane_h:
-; CHECK: sqrdcmlah z0.h, z1.h, z2.h[1], #90
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdcmlah z0.h, z1.h, z2.h[1], #90
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                             <vscale x 8 x i16> %b,
                                                                             <vscale x 8 x i16> %c,
@@ -231,8 +251,9 @@ define <vscale x 8 x i16> @sqrdcmlah_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x
 
 define <vscale x 4 x i32> @sqrdcmlah_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sqrdcmlah_lane_s:
-; CHECK: sqrdcmlah z0.s, z1.s, z2.s[0], #180
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdcmlah z0.s, z1.s, z2.s[0], #180
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                             <vscale x 4 x i32> %b,
                                                                             <vscale x 4 x i32> %c,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll
index 60bd2e167271f..dfb9d7d831b17 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
 
 ;
 ; SQSHLU
@@ -6,9 +7,10 @@
 
 define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqshlu_i8:
-; CHECK:      movprfx z0.b, p0/z, z0.b
-; CHECK-NEXT: sqshlu z0.b, p0/m, z0.b, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.b, p0/z, z0.b
+; CHECK-NEXT:    sqshlu z0.b, p0/m, z0.b, #2
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a_z,
@@ -18,9 +20,10 @@ define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqshlu_i16:
-; CHECK:      movprfx z0.h, p0/z, z0.h
-; CHECK-NEXT: sqshlu z0.h, p0/m, z0.h, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    sqshlu z0.h, p0/m, z0.h, #3
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a_z,
@@ -30,9 +33,10 @@ define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqshlu_i32:
-; CHECK:      movprfx z0.s, p0/z, z0.s
-; CHECK-NEXT: sqshlu z0.s, p0/m, z0.s, #29
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    sqshlu z0.s, p0/m, z0.s, #29
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a_z,
@@ -42,9 +46,10 @@ define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqshlu_i64:
-; CHECK:      movprfx z0.d, p0/z, z0.d
-; CHECK-NEXT: sqshlu z0.d, p0/m, z0.d, #62
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    sqshlu z0.d, p0/m, z0.d, #62
+; CHECK-NEXT:    ret
   %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a_z,

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
index 9afdb48c053e9..1b6873e84b09e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; SABA
@@ -6,8 +7,9 @@
 
 define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: saba_i8:
-; CHECK: saba z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saba z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %c)
@@ -16,8 +18,9 @@ define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
 
 define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: saba_i16:
-; CHECK: saba z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saba z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %c)
@@ -26,8 +29,9 @@ define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: saba_i32:
-; CHECK: saba z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saba z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %c)
@@ -36,8 +40,9 @@ define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: saba_i64:
-; CHECK: saba z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saba z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %c)
@@ -50,8 +55,9 @@ define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @shadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: shadd_i8:
-; CHECK: shadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -60,8 +66,9 @@ define <vscale x 16 x i8> @shadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @shadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: shadd_i16:
-; CHECK: shadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -70,8 +77,9 @@ define <vscale x 8 x i16> @shadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @shadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: shadd_i32:
-; CHECK: shadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -80,8 +88,9 @@ define <vscale x 4 x i32> @shadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @shadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: shadd_i64:
-; CHECK: shadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -94,8 +103,9 @@ define <vscale x 2 x i64> @shadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: shsub_i8:
-; CHECK: shsub z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -104,8 +114,9 @@ define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: shsub_i16:
-; CHECK: shsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -114,8 +125,9 @@ define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: shsub_i32:
-; CHECK: shsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -124,8 +136,9 @@ define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: shsub_i64:
-; CHECK: shsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -138,8 +151,9 @@ define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @shsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: shsubr_i8:
-; CHECK: shsubr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -148,8 +162,9 @@ define <vscale x 16 x i8> @shsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @shsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: shsubr_i16:
-; CHECK: shsubr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -158,8 +173,9 @@ define <vscale x 8 x i16> @shsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @shsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: shsubr_i32:
-; CHECK: shsubr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -168,8 +184,9 @@ define <vscale x 4 x i32> @shsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @shsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: shsubr_i64:
-; CHECK: shsubr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -182,8 +199,9 @@ define <vscale x 2 x i64> @shsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sli_i8:
-; CHECK: sli z0.b, z1.b, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sli z0.b, z1.b, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                i32 0)
@@ -192,8 +210,9 @@ define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sli_i16:
-; CHECK: sli z0.h, z1.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sli z0.h, z1.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                i32 1)
@@ -202,8 +221,9 @@ define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 
 define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sli_i32:
-; CHECK: sli z0.s, z1.s, #30
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sli z0.s, z1.s, #30
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                i32 30);
@@ -212,8 +232,9 @@ define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 
 define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sli_i64:
-; CHECK: sli z0.d, z1.d, #63
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sli z0.d, z1.d, #63
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                i32 63)
@@ -226,8 +247,9 @@ define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 
 define <vscale x 16 x i8> @sqabs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqabs_i8:
-; CHECK: sqabs z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqabs z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b)
@@ -236,8 +258,9 @@ define <vscale x 16 x i8> @sqabs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %p
 
 define <vscale x 8 x i16> @sqabs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqabs_i16:
-; CHECK: sqabs z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqabs z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b)
@@ -246,8 +269,9 @@ define <vscale x 8 x i16> @sqabs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %p
 
 define <vscale x 4 x i32> @sqabs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqabs_i32:
-; CHECK: sqabs z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqabs z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b)
@@ -256,8 +280,9 @@ define <vscale x 4 x i32> @sqabs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %p
 
 define <vscale x 2 x i64> @sqabs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqabs_i64:
-; CHECK: sqabs z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -270,8 +295,9 @@ define <vscale x 2 x i64> @sqabs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %p
 
 define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqadd_i8:
-; CHECK: sqadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -280,8 +306,9 @@ define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqadd_i16:
-; CHECK: sqadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -290,8 +317,9 @@ define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqadd_i32:
-; CHECK: sqadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -300,8 +328,9 @@ define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqadd_i64:
-; CHECK: sqadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -314,8 +343,9 @@ define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqdmulh_i8:
-; CHECK: sqdmulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -323,8 +353,9 @@ define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqdmulh_i16:
-; CHECK: sqdmulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -332,8 +363,9 @@ define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqdmulh_i32:
-; CHECK: sqdmulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -341,8 +373,9 @@ define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqdmulh_i64:
-; CHECK: sqdmulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -354,8 +387,9 @@ define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 8 x i16> @sqdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqdmulh_lane_i16:
-; CHECK: sqdmulh z0.h, z0.h, z1.h[7]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h[7]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                         <vscale x 8 x i16> %b,
                                                                         i32 7)
@@ -364,8 +398,9 @@ define <vscale x 8 x i16> @sqdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 
 define <vscale x 4 x i32> @sqdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqdmulh_lane_i32:
-; CHECK: sqdmulh z0.s, z0.s, z1.s[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                         <vscale x 4 x i32> %b,
                                                                         i32 3);
@@ -374,8 +409,9 @@ define <vscale x 4 x i32> @sqdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 
 define <vscale x 2 x i64> @sqdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqdmulh_lane_i64:
-; CHECK: sqdmulh z0.d, z0.d, z1.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                         <vscale x 2 x i64> %b,
                                                                         i32 1)
@@ -388,8 +424,9 @@ define <vscale x 2 x i64> @sqdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x
 
 define <vscale x 16 x i8> @sqneg_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqneg_i8:
-; CHECK: sqneg z0.b, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqneg z0.b, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b)
@@ -398,8 +435,9 @@ define <vscale x 16 x i8> @sqneg_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %p
 
 define <vscale x 8 x i16> @sqneg_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqneg_i16:
-; CHECK: sqneg z0.h, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqneg z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b)
@@ -408,8 +446,9 @@ define <vscale x 8 x i16> @sqneg_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %p
 
 define <vscale x 4 x i32> @sqneg_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqneg_i32:
-; CHECK: sqneg z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqneg z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b)
@@ -418,8 +457,9 @@ define <vscale x 4 x i32> @sqneg_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %p
 
 define <vscale x 2 x i64> @sqneg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqneg_i64:
-; CHECK: sqneg z0.d, p0/m, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqneg z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b)
@@ -432,8 +472,9 @@ define <vscale x 2 x i64> @sqneg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %p
 
 define <vscale x 16 x i8> @sqrdmlah_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sqrdmlah_i8:
-; CHECK: sqrdmlah z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b,
                                                                     <vscale x 16 x i8> %c)
@@ -442,8 +483,9 @@ define <vscale x 16 x i8> @sqrdmlah_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqrdmlah_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sqrdmlah_i16:
-; CHECK: sqrdmlah z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     <vscale x 8 x i16> %c)
@@ -452,8 +494,9 @@ define <vscale x 8 x i16> @sqrdmlah_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @sqrdmlah_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sqrdmlah_i32:
-; CHECK: sqrdmlah z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     <vscale x 4 x i32> %c)
@@ -462,8 +505,9 @@ define <vscale x 4 x i32> @sqrdmlah_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32
 
 define <vscale x 2 x i64> @sqrdmlah_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sqrdmlah_i64:
-; CHECK: sqrdmlah z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     <vscale x 2 x i64> %c)
@@ -476,8 +520,9 @@ define <vscale x 2 x i64> @sqrdmlah_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
 
 define <vscale x 8 x i16> @sqrdmlah_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sqrdmlah_lane_i16:
-; CHECK: sqrdmlah z0.h, z1.h, z2.h[5]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.h, z1.h, z2.h[5]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                          <vscale x 8 x i16> %b,
                                                                          <vscale x 8 x i16> %c,
@@ -487,8 +532,9 @@ define <vscale x 8 x i16> @sqrdmlah_lane_i16(<vscale x 8 x i16> %a, <vscale x 8
 
 define <vscale x 4 x i32> @sqrdmlah_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sqrdmlah_lane_i32:
-; CHECK: sqrdmlah z0.s, z1.s, z2.s[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.s, z1.s, z2.s[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                          <vscale x 4 x i32> %b,
                                                                          <vscale x 4 x i32> %c,
@@ -498,8 +544,9 @@ define <vscale x 4 x i32> @sqrdmlah_lane_i32(<vscale x 4 x i32> %a, <vscale x 4
 
 define <vscale x 2 x i64> @sqrdmlah_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sqrdmlah_lane_i64:
-; CHECK: sqrdmlah z0.d, z1.d, z2.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlah z0.d, z1.d, z2.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                          <vscale x 2 x i64> %b,
                                                                          <vscale x 2 x i64> %c,
@@ -513,8 +560,9 @@ define <vscale x 2 x i64> @sqrdmlah_lane_i64(<vscale x 2 x i64> %a, <vscale x 2
 
 define <vscale x 16 x i8> @sqrdmlsh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sqrdmlsh_i8:
-; CHECK: sqrdmlsh z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b,
                                                                     <vscale x 16 x i8> %c)
@@ -523,8 +571,9 @@ define <vscale x 16 x i8> @sqrdmlsh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqrdmlsh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sqrdmlsh_i16:
-; CHECK: sqrdmlsh z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b,
                                                                     <vscale x 8 x i16> %c)
@@ -533,8 +582,9 @@ define <vscale x 8 x i16> @sqrdmlsh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @sqrdmlsh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sqrdmlsh_i32:
-; CHECK: sqrdmlsh z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b,
                                                                     <vscale x 4 x i32> %c)
@@ -543,8 +593,9 @@ define <vscale x 4 x i32> @sqrdmlsh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32
 
 define <vscale x 2 x i64> @sqrdmlsh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sqrdmlsh_i64:
-; CHECK: sqrdmlsh z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b,
                                                                     <vscale x 2 x i64> %c)
@@ -557,8 +608,9 @@ define <vscale x 2 x i64> @sqrdmlsh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
 
 define <vscale x 8 x i16> @sqrdmlsh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sqrdmlsh_lane_i16:
-; CHECK: sqrdmlsh z0.h, z1.h, z2.h[4]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.h, z1.h, z2.h[4]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                          <vscale x 8 x i16> %b,
                                                                          <vscale x 8 x i16> %c,
@@ -568,8 +620,9 @@ define <vscale x 8 x i16> @sqrdmlsh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8
 
 define <vscale x 4 x i32> @sqrdmlsh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sqrdmlsh_lane_i32:
-; CHECK: sqrdmlsh z0.s, z1.s, z2.s[0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.s, z1.s, z2.s[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                          <vscale x 4 x i32> %b,
                                                                          <vscale x 4 x i32> %c,
@@ -579,8 +632,9 @@ define <vscale x 4 x i32> @sqrdmlsh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4
 
 define <vscale x 2 x i64> @sqrdmlsh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sqrdmlsh_lane_i64:
-; CHECK: sqrdmlsh z0.d, z1.d, z2.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmlsh z0.d, z1.d, z2.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                          <vscale x 2 x i64> %b,
                                                                          <vscale x 2 x i64> %c,
@@ -594,8 +648,9 @@ define <vscale x 2 x i64> @sqrdmlsh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2
 
 define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqrdmulh_i8:
-; CHECK: sqrdmulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
@@ -603,8 +658,9 @@ define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqrdmulh_i16:
-; CHECK: sqrdmulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
@@ -612,8 +668,9 @@ define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16
 
 define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqrdmulh_i32:
-; CHECK: sqrdmulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
@@ -621,8 +678,9 @@ define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32
 
 define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrdmulh_i64:
-; CHECK: sqrdmulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
@@ -634,8 +692,9 @@ define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
 
 define <vscale x 8 x i16> @sqrdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqrdmulh_lane_i16:
-; CHECK: sqrdmulh z0.h, z0.h, z1.h[6]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h[6]
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                          <vscale x 8 x i16> %b,
                                                                          i32 6)
@@ -644,8 +703,9 @@ define <vscale x 8 x i16> @sqrdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8
 
 define <vscale x 4 x i32> @sqrdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqrdmulh_lane_i32:
-; CHECK: sqrdmulh z0.s, z0.s, z1.s[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                          <vscale x 4 x i32> %b,
                                                                          i32 2);
@@ -654,8 +714,9 @@ define <vscale x 4 x i32> @sqrdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4
 
 define <vscale x 2 x i64> @sqrdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrdmulh_lane_i64:
-; CHECK: sqrdmulh z0.d, z0.d, z1.d[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                          <vscale x 2 x i64> %b,
                                                                          i32 1)
@@ -668,8 +729,9 @@ define <vscale x 2 x i64> @sqrdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2
 
 define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqrshl_i8:
-; CHECK: sqrshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -678,8 +740,9 @@ define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqrshl_i16:
-; CHECK: sqrshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -688,8 +751,9 @@ define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqrshl_i32:
-; CHECK: sqrshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -698,8 +762,9 @@ define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrshl_i64:
-; CHECK: sqrshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -712,9 +777,10 @@ define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqrshlr_i8:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    sqrshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b,
@@ -724,9 +790,10 @@ define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqrshlr_i16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: sqrshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sqrshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b,
@@ -736,9 +803,10 @@ define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqrshlr_i32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: sqrshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sqrshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b,
@@ -748,9 +816,10 @@ define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrshlr_i64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: sqrshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sqrshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
@@ -760,9 +829,10 @@ define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqrshlr_i64_noptrue:
-; CHECK: sqrshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %a)
@@ -775,8 +845,9 @@ define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x
 
 define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqshl_i8:
-; CHECK: sqshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -785,8 +856,9 @@ define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqshl_i16:
-; CHECK: sqshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -795,8 +867,9 @@ define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqshl_i32:
-; CHECK: sqshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -805,8 +878,9 @@ define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqshl_i64:
-; CHECK: sqshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -819,9 +893,10 @@ define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqshlr_i8:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    sqshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b,
@@ -831,9 +906,10 @@ define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqshlr_i16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: sqshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sqshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b,
@@ -843,9 +919,10 @@ define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqshlr_i32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: sqshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sqshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b,
@@ -855,9 +932,10 @@ define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqshlr_i64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: sqshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sqshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
@@ -867,9 +945,10 @@ define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqshlr_i64_noptrue:
-; CHECK: sqshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %a)
@@ -882,8 +961,9 @@ define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 16 x i8> @sqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqshl_n_i8:
-; CHECK: sqshl z0.b, p0/m, z0.b, #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT:    ret
   %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -893,8 +973,9 @@ define <vscale x 16 x i8> @sqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqshl_n_i16:
-; CHECK: sqshl z0.h, p0/m, z0.h, #15
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT:    ret
   %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -904,8 +985,9 @@ define <vscale x 8 x i16> @sqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqshl_n_i32:
-; CHECK: sqshl z0.s, p0/m, z0.s, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT:    ret
   %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -915,8 +997,9 @@ define <vscale x 4 x i32> @sqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqshl_n_i64:
-; CHECK: sqshl z0.d, p0/m, z0.d, #63
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT:    ret
   %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -926,9 +1009,10 @@ define <vscale x 2 x i64> @sqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqshl_n_i8_range:
-; CHECK: mov z1.b, #8
-; CHECK: sqshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.b, #8 // =0x8
+; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -938,9 +1022,10 @@ define <vscale x 16 x i8> @sqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16
 
 define <vscale x 8 x i16> @sqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqshl_n_i16_range:
-; CHECK: mov z1.h, #16
-; CHECK: sqshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #16 // =0x10
+; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -950,9 +1035,10 @@ define <vscale x 8 x i16> @sqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8
 
 define <vscale x 4 x i32> @sqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqshl_n_i32_range:
-; CHECK: mov z1.s, #32
-; CHECK: sqshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #32 // =0x20
+; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -962,9 +1048,10 @@ define <vscale x 4 x i32> @sqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 2 x i64> @sqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqshl_n_i64_range:
-; CHECK: mov z1.d, #64
-; CHECK: sqshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #64 // =0x40
+; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -978,8 +1065,9 @@ define <vscale x 2 x i64> @sqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqshlu_i8:
-; CHECK: sqshlu z0.b, p0/m, z0.b, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshlu z0.b, p0/m, z0.b, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   i32 2)
@@ -988,8 +1076,9 @@ define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqshlu_i16:
-; CHECK: sqshlu z0.h, p0/m, z0.h, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshlu z0.h, p0/m, z0.h, #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   i32 3)
@@ -998,8 +1087,9 @@ define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqshlu_i32:
-; CHECK: sqshlu z0.s, p0/m, z0.s, #29
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshlu z0.s, p0/m, z0.s, #29
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   i32 29)
@@ -1008,8 +1098,9 @@ define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqshlu_i64:
-; CHECK: sqshlu z0.d, p0/m, z0.d, #62
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqshlu z0.d, p0/m, z0.d, #62
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   i32 62)
@@ -1022,8 +1113,9 @@ define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqsub_i8:
-; CHECK: sqsub z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -1032,8 +1124,9 @@ define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqsub_i16:
-; CHECK: sqsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -1042,8 +1135,9 @@ define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqsub_i32:
-; CHECK: sqsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -1052,8 +1146,9 @@ define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqsub_i64:
-; CHECK: sqsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -1066,8 +1161,9 @@ define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqsubr_i8:
-; CHECK: sqsubr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -1076,8 +1172,9 @@ define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqsubr_i16:
-; CHECK: sqsubr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -1086,8 +1183,9 @@ define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqsubr_i32:
-; CHECK: sqsubr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -1096,8 +1194,9 @@ define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sqsubr_i64:
-; CHECK: sqsubr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -1110,8 +1209,9 @@ define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: srhadd_i8:
-; CHECK: srhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -1120,8 +1220,9 @@ define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: srhadd_i16:
-; CHECK: srhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -1130,8 +1231,9 @@ define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: srhadd_i32:
-; CHECK: srhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -1140,8 +1242,9 @@ define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: srhadd_i64:
-; CHECK: srhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -1154,8 +1257,9 @@ define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sri_i8:
-; CHECK: sri z0.b, z1.b, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sri z0.b, z1.b, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                i32 1)
@@ -1164,8 +1268,9 @@ define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sri_i16:
-; CHECK: sri z0.h, z1.h, #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sri z0.h, z1.h, #16
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                i32 16)
@@ -1174,8 +1279,9 @@ define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 
 define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sri_i32:
-; CHECK: sri z0.s, z1.s, #32
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sri z0.s, z1.s, #32
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                i32 32);
@@ -1184,8 +1290,9 @@ define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 
 define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: sri_i64:
-; CHECK: sri z0.d, z1.d, #64
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sri z0.d, z1.d, #64
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                i32 64)
@@ -1198,8 +1305,9 @@ define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 
 define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: srshl_i8:
-; CHECK: srshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -1208,8 +1316,9 @@ define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: srshl_i16:
-; CHECK: srshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -1218,8 +1327,9 @@ define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: srshl_i32:
-; CHECK: srshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -1228,8 +1338,9 @@ define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: srshl_i64:
-; CHECK: srshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -1242,9 +1353,10 @@ define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: srshlr_i8:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    srshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b,
@@ -1254,9 +1366,10 @@ define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: srshlr_i16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: srshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    srshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b,
@@ -1266,9 +1379,10 @@ define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: srshlr_i32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: srshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    srshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b,
@@ -1278,9 +1392,10 @@ define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: srshlr_i64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: srshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    srshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
@@ -1290,9 +1405,10 @@ define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: srshlr_i64_noptrue:
-; CHECK: srshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %a)
@@ -1305,8 +1421,9 @@ define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: srshr_i8:
-; CHECK: srshr z0.b, p0/m, z0.b, #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshr z0.b, p0/m, z0.b, #8
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  i32 8)
@@ -1315,8 +1432,9 @@ define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: srshr_i16:
-; CHECK: srshr z0.h, p0/m, z0.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshr z0.h, p0/m, z0.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  i32 1)
@@ -1325,8 +1443,9 @@ define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: srshr_i32:
-; CHECK: srshr z0.s, p0/m, z0.s, #22
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshr z0.s, p0/m, z0.s, #22
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  i32 22)
@@ -1335,8 +1454,9 @@ define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: srshr_i64:
-; CHECK: srshr z0.d, p0/m, z0.d, #54
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srshr z0.d, p0/m, z0.d, #54
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  i32 54)
@@ -1349,8 +1469,9 @@ define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: srsra_i8:
-; CHECK: srsra z0.b, z1.b, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srsra z0.b, z1.b, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b,
                                                                  i32 2)
@@ -1359,8 +1480,9 @@ define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: srsra_i16:
-; CHECK: srsra z0.h, z1.h, #15
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srsra z0.h, z1.h, #15
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b,
                                                                  i32 15)
@@ -1369,8 +1491,9 @@ define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: srsra_i32:
-; CHECK: srsra z0.s, z1.s, #12
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srsra z0.s, z1.s, #12
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b,
                                                                  i32 12)
@@ -1379,8 +1502,9 @@ define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: srsra_i64:
-; CHECK: srsra z0.d, z1.d, #44
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srsra z0.d, z1.d, #44
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b,
                                                                  i32 44)
@@ -1393,8 +1517,9 @@ define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssra_i8:
-; CHECK: ssra z0.b, z1.b, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssra z0.b, z1.b, #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 i32 3)
@@ -1403,8 +1528,9 @@ define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssra_i16:
-; CHECK: ssra z0.h, z1.h, #14
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssra z0.h, z1.h, #14
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 i32 14)
@@ -1413,8 +1539,9 @@ define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssra_i32:
-; CHECK: ssra z0.s, z1.s, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssra z0.s, z1.s, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 i32 2)
@@ -1423,8 +1550,9 @@ define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ssra_i64:
-; CHECK: ssra z0.d, z1.d, #34
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssra z0.d, z1.d, #34
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 i32 34)
@@ -1437,8 +1565,9 @@ define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: suqadd_i8:
-; CHECK: suqadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    suqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -1447,8 +1576,9 @@ define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: suqadd_i16:
-; CHECK: suqadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    suqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -1457,8 +1587,9 @@ define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: suqadd_i32:
-; CHECK: suqadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    suqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -1467,8 +1598,9 @@ define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: suqadd_i64:
-; CHECK: suqadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    suqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -1481,8 +1613,9 @@ define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: uaba_i8:
-; CHECK: uaba z0.b, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaba z0.b, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %c)
@@ -1491,8 +1624,9 @@ define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
 
 define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: uaba_i16:
-; CHECK: uaba z0.h, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaba z0.h, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %c)
@@ -1501,8 +1635,9 @@ define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: uaba_i32:
-; CHECK: uaba z0.s, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaba z0.s, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %c)
@@ -1511,8 +1646,9 @@ define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: uaba_i64:
-; CHECK: uaba z0.d, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaba z0.d, z1.d, z2.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %c)
@@ -1525,8 +1661,9 @@ define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 
 define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uhadd_i8:
-; CHECK: uhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -1535,8 +1672,9 @@ define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uhadd_i16:
-; CHECK: uhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -1545,8 +1683,9 @@ define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uhadd_i32:
-; CHECK: uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -1555,8 +1694,9 @@ define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uhadd_i64:
-; CHECK: uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -1569,8 +1709,9 @@ define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uhsub_i8:
-; CHECK: uhsub z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -1579,8 +1720,9 @@ define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uhsub_i16:
-; CHECK: uhsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -1589,8 +1731,9 @@ define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uhsub_i32:
-; CHECK: uhsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -1599,8 +1742,9 @@ define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uhsub_i64:
-; CHECK: uhsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -1613,8 +1757,9 @@ define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uhsubr_i8:
-; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -1623,8 +1768,9 @@ define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uhsubr_i16:
-; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -1633,8 +1779,9 @@ define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uhsubr_i32:
-; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -1643,8 +1790,9 @@ define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uhsubr_i64:
-; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -1657,8 +1805,9 @@ define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqadd_i8:
-; CHECK: uqadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -1667,8 +1816,9 @@ define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqadd_i16:
-; CHECK: uqadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -1677,8 +1827,9 @@ define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqadd_i32:
-; CHECK: uqadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -1687,8 +1838,9 @@ define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqadd_i64:
-; CHECK: uqadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -1701,8 +1853,9 @@ define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqrshl_i8:
-; CHECK: uqrshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -1711,8 +1864,9 @@ define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqrshl_i16:
-; CHECK: uqrshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -1721,8 +1875,9 @@ define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqrshl_i32:
-; CHECK: uqrshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -1731,8 +1886,9 @@ define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqrshl_i64:
-; CHECK: uqrshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -1745,9 +1901,10 @@ define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqrshlr_i8:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    uqrshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b,
@@ -1757,9 +1914,10 @@ define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqrshlr_i16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: uqrshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    uqrshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b,
@@ -1769,9 +1927,10 @@ define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqrshlr_i32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: uqrshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    uqrshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b,
@@ -1781,9 +1940,10 @@ define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqrshlr_i64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: uqrshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uqrshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
@@ -1793,9 +1953,10 @@ define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqrshlr_i64_noptrue:
-; CHECK: uqrshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqrshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %a)
@@ -1808,8 +1969,9 @@ define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x
 
 define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqshl_i8:
-; CHECK: uqshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -1818,8 +1980,9 @@ define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqshl_i16:
-; CHECK: uqshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -1828,8 +1991,9 @@ define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqshl_i32:
-; CHECK: uqshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -1838,8 +2002,9 @@ define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqshl_i64:
-; CHECK: uqshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -1852,9 +2017,10 @@ define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqshlr_i8:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    uqshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b,
@@ -1864,9 +2030,10 @@ define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqshlr_i16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: uqshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    uqshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b,
@@ -1876,9 +2043,10 @@ define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqshlr_i32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: uqshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    uqshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b,
@@ -1888,9 +2056,10 @@ define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqshlr_i64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: uqshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uqshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
@@ -1900,9 +2069,10 @@ define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqshlr_i64_noptrue:
-; CHECK: uqshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %a)
@@ -1915,8 +2085,9 @@ define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 16 x i8> @uqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: uqshl_n_i8:
-; CHECK: uqshl z0.b, p0/m, z0.b, #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT:    ret
   %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -1926,8 +2097,9 @@ define <vscale x 16 x i8> @uqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @uqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqshl_n_i16:
-; CHECK: uqshl z0.h, p0/m, z0.h, #15
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT:    ret
   %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -1937,8 +2109,9 @@ define <vscale x 8 x i16> @uqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @uqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqshl_n_i32:
-; CHECK: uqshl z0.s, p0/m, z0.s, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT:    ret
   %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -1948,8 +2121,9 @@ define <vscale x 4 x i32> @uqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqshl_n_i64:
-; CHECK: uqshl z0.d, p0/m, z0.d, #63
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT:    ret
   %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -1959,9 +2133,10 @@ define <vscale x 2 x i64> @uqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @uqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: uqshl_n_i8_range:
-; CHECK: mov z1.b, #8
-; CHECK: uqshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.b, #8 // =0x8
+; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
@@ -1971,9 +2146,10 @@ define <vscale x 16 x i8> @uqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16
 
 define <vscale x 8 x i16> @uqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: uqshl_n_i16_range:
-; CHECK: mov z1.h, #16
-; CHECK: uqshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #16 // =0x10
+; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
@@ -1983,9 +2159,10 @@ define <vscale x 8 x i16> @uqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8
 
 define <vscale x 4 x i32> @uqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: uqshl_n_i32_range:
-; CHECK: mov z1.s, #32
-; CHECK: uqshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #32 // =0x20
+; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
@@ -1995,9 +2172,10 @@ define <vscale x 4 x i32> @uqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4
 
 define <vscale x 2 x i64> @uqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: uqshl_n_i64_range:
-; CHECK: mov z1.d, #64
-; CHECK: uqshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #64 // =0x40
+; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
@@ -2011,8 +2189,9 @@ define <vscale x 2 x i64> @uqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqsub_i8:
-; CHECK: uqsub z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -2021,8 +2200,9 @@ define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqsub_i16:
-; CHECK: uqsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -2031,8 +2211,9 @@ define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqsub_i32:
-; CHECK: uqsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -2041,8 +2222,9 @@ define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqsub_i64:
-; CHECK: uqsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -2055,8 +2237,9 @@ define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uqsubr_i8:
-; CHECK: uqsubr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -2065,8 +2248,9 @@ define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uqsubr_i16:
-; CHECK: uqsubr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -2075,8 +2259,9 @@ define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uqsubr_i32:
-; CHECK: uqsubr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -2085,8 +2270,9 @@ define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: uqsubr_i64:
-; CHECK: uqsubr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uqsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -2099,8 +2285,9 @@ define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: urecpe_i32:
-; CHECK: urecpe z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %b)
@@ -2113,8 +2300,9 @@ define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
 
 define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: urhadd_i8:
-; CHECK: urhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -2123,8 +2311,9 @@ define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: urhadd_i16:
-; CHECK: urhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -2133,8 +2322,9 @@ define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: urhadd_i32:
-; CHECK: urhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -2143,8 +2333,9 @@ define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: urhadd_i64:
-; CHECK: urhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                              <vscale x 2 x i64> %a,
                                                              <vscale x 2 x i64> %b)
@@ -2157,8 +2348,9 @@ define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: urshl_i8:
-; CHECK: urshl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -2167,8 +2359,9 @@ define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: urshl_i16:
-; CHECK: urshl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -2177,8 +2370,9 @@ define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: urshl_i32:
-; CHECK: urshl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -2187,8 +2381,9 @@ define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: urshl_i64:
-; CHECK: urshl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -2201,9 +2396,10 @@ define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: urshlr_i8:
-; CHECK: ptrue p0.b
-; CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    urshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %b,
@@ -2213,9 +2409,10 @@ define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: urshlr_i16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: urshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    urshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %b,
@@ -2225,9 +2422,10 @@ define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: urshlr_i32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: urshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    urshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b,
@@ -2237,9 +2435,10 @@ define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: urshlr_i64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: urshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    urshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
@@ -2249,9 +2448,10 @@ define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 
 define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: urshlr_i64_noptrue:
-; CHECK: urshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %b,
                                                                  <vscale x 2 x i64> %a)
@@ -2264,8 +2464,9 @@ define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: urshr_i8:
-; CHECK: urshr z0.b, p0/m, z0.b, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshr z0.b, p0/m, z0.b, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  i32 4)
@@ -2274,8 +2475,9 @@ define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
 
 define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
 ; CHECK-LABEL: urshr_i16:
-; CHECK: urshr z0.h, p0/m, z0.h, #13
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshr z0.h, p0/m, z0.h, #13
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  i32 13)
@@ -2284,8 +2486,9 @@ define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: urshr_i32:
-; CHECK: urshr z0.s, p0/m, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshr z0.s, p0/m, z0.s, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  i32 1)
@@ -2294,8 +2497,9 @@ define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
 ; CHECK-LABEL: urshr_i64:
-; CHECK: urshr z0.d, p0/m, z0.d, #24
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urshr z0.d, p0/m, z0.d, #24
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  i32 24)
@@ -2308,8 +2512,9 @@ define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
 
 define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ursqrte_i32:
-; CHECK: ursqrte z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ursqrte z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x i32> %b)
@@ -2322,8 +2527,9 @@ define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1>
 
 define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ursra_i8:
-; CHECK: ursra z0.b, z1.b, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ursra z0.b, z1.b, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b,
                                                                  i32 5)
@@ -2332,8 +2538,9 @@ define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ursra_i16:
-; CHECK: ursra z0.h, z1.h, #12
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ursra z0.h, z1.h, #12
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b,
                                                                  i32 12)
@@ -2342,8 +2549,9 @@ define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ursra_i32:
-; CHECK: ursra z0.s, z1.s, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ursra z0.s, z1.s, #31
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b,
                                                                  i32 31)
@@ -2352,8 +2560,9 @@ define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: ursra_i64:
-; CHECK: ursra z0.d, z1.d, #14
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ursra z0.d, z1.d, #14
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b,
                                                                  i32 14)
@@ -2366,8 +2575,9 @@ define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 
 define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: usqadd_i8:
-; CHECK: usqadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                   <vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -2376,8 +2586,9 @@ define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 
 define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: usqadd_i16:
-; CHECK: usqadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -2386,8 +2597,9 @@ define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 
 define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: usqadd_i32:
-; CHECK: usqadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -2396,8 +2608,9 @@ define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: usqadd_i64:
-; CHECK: usqadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
@@ -2410,8 +2623,9 @@ define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: usra_i8:
-; CHECK: usra z0.b, z1.b, #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usra z0.b, z1.b, #6
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 i32 6)
@@ -2420,8 +2634,9 @@ define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 
 define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: usra_i16:
-; CHECK: usra z0.h, z1.h, #11
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usra z0.h, z1.h, #11
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 i32 11)
@@ -2430,8 +2645,9 @@ define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: usra_i32:
-; CHECK: usra z0.s, z1.s, #21
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usra z0.s, z1.s, #21
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 i32 21)
@@ -2440,8 +2656,9 @@ define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: usra_i64:
-; CHECK: usra z0.d, z1.d, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usra z0.d, z1.d, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 i32 4)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
index 97ac6035c4eb0..dac12aa97a469 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; HISTCNT
@@ -6,8 +7,9 @@
 
 define <vscale x 4 x i32> @histcnt_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: histcnt_i32:
-; CHECK: histcnt z0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    histcnt z0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.histcnt.nxv4i32(<vscale x 4 x i1> %pg,
                                                                    <vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b)
@@ -16,8 +18,9 @@ define <vscale x 4 x i32> @histcnt_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @histcnt_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: histcnt_i64:
-; CHECK: histcnt z0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    histcnt z0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.histcnt.nxv2i64(<vscale x 2 x i1> %pg,
                                                                    <vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b)
@@ -30,8 +33,9 @@ define <vscale x 2 x i64> @histcnt_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 16 x i8> @histseg(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: histseg:
-; CHECK: histseg z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    histseg z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.histseg.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
index ced16443cc92f..4d08ccfae8f1b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,64 +8,72 @@
 
 define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_b_ww:
-; CHECK: whilege p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_b_xx:
-; CHECK: whilege p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_h_ww:
-; CHECK: whilege p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_h_xx:
-; CHECK: whilege p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_s_ww:
-; CHECK: whilege p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_s_xx:
-; CHECK: whilege p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_d_ww:
-; CHECK: whilege p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_d_xx:
-; CHECK: whilege p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }
@@ -75,64 +84,72 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
 
 define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_b_ww:
-; CHECK: whilehs p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_b_xx:
-; CHECK: whilehs p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_h_ww:
-; CHECK: whilehs p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_h_xx:
-; CHECK: whilehs p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_s_ww:
-; CHECK: whilehs p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_s_xx:
-; CHECK: whilehs p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_d_ww:
-; CHECK: whilehs p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_d_xx:
-; CHECK: whilehs p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }
@@ -143,64 +160,72 @@ define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
 
 define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_b_ww:
-; CHECK: whilegt p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilegt_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_b_xx:
-; CHECK: whilegt p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilegt_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_h_ww:
-; CHECK: whilegt p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilegt_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_h_xx:
-; CHECK: whilegt p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilegt_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_s_ww:
-; CHECK: whilegt p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilegt_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_s_xx:
-; CHECK: whilegt p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilegt_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_d_ww:
-; CHECK: whilegt p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_d_xx:
-; CHECK: whilegt p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }
@@ -211,64 +236,72 @@ define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
 
 define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_b_ww:
-; CHECK: whilehi p0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 %a, i32 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 16 x i1> @whilehi_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_b_xx:
-; CHECK: whilehi p0.b, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 %a, i64 %b)
   ret <vscale x 16 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilehi_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_h_ww:
-; CHECK: whilehi p0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 %a, i32 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 8 x i1> @whilehi_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_h_xx:
-; CHECK: whilehi p0.h, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 %a, i64 %b)
   ret <vscale x 8 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilehi_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_s_ww:
-; CHECK: whilehi p0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 %a, i32 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_s_xx:
-; CHECK: whilehi p0.s, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 %a, i64 %b)
   ret <vscale x 4 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilehi_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_d_ww:
-; CHECK: whilehi p0.d, w0, w1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, w0, w1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 %a, i32 %b)
   ret <vscale x 2 x i1> %out
 }
 
 define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_d_xx:
-; CHECK: whilehi p0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, x0, x1
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %a, i64 %b)
   ret <vscale x 2 x i1> %out
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
index caa53fdc1d9eb..ee31ec99ca4ac 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 8 x i16> @saddlbt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: saddlbt_b:
-; CHECK: saddlbt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlbt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlbt.nxv8i16(<vscale x 16 x i8> %a,
                                                               <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -16,8 +18,9 @@ define <vscale x 8 x i16> @saddlbt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 4 x i32> @saddlbt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: saddlbt_h:
-; CHECK: saddlbt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlbt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlbt.nxv4i32(<vscale x 8 x i16> %a,
                                                               <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -25,8 +28,9 @@ define <vscale x 4 x i32> @saddlbt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 2 x i64> @saddlbt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: saddlbt_s:
-; CHECK: saddlbt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlbt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlbt.nxv2i64(<vscale x 4 x i32> %a,
                                                               <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -38,8 +42,9 @@ define <vscale x 2 x i64> @saddlbt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 8 x i16> @ssublbt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssublbt_b:
-; CHECK: ssublbt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublbt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublbt.nxv8i16(<vscale x 16 x i8> %a,
                                                               <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -47,8 +52,9 @@ define <vscale x 8 x i16> @ssublbt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 4 x i32> @ssublbt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssublbt_h:
-; CHECK: ssublbt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublbt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublbt.nxv4i32(<vscale x 8 x i16> %a,
                                                               <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -56,8 +62,9 @@ define <vscale x 4 x i32> @ssublbt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 2 x i64> @ssublbt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssublbt_s:
-; CHECK: ssublbt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublbt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublbt.nxv2i64(<vscale x 4 x i32> %a,
                                                               <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -69,8 +76,9 @@ define <vscale x 2 x i64> @ssublbt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 
 define <vscale x 8 x i16> @ssubltb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssubltb_b:
-; CHECK: ssubltb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubltb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubltb.nxv8i16(<vscale x 16 x i8> %a,
                                                               <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -78,8 +86,9 @@ define <vscale x 8 x i16> @ssubltb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
 
 define <vscale x 4 x i32> @ssubltb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssubltb_h:
-; CHECK: ssubltb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubltb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubltb.nxv4i32(<vscale x 8 x i16> %a,
                                                               <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -87,8 +96,9 @@ define <vscale x 4 x i32> @ssubltb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 
 define <vscale x 2 x i64> @ssubltb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssubltb_s:
-; CHECK: ssubltb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubltb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubltb.nxv2i64(<vscale x 4 x i32> %a,
                                                               <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
index f16a7dabea566..86ef2dd3bf1ef 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 8 x i16> @sabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sabalb_b:
-; CHECK: sabalb z0.h, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabalb z0.h, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b,
                                                                   <vscale x 16 x i8> %c)
@@ -17,8 +19,9 @@ define <vscale x 8 x i16> @sabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @sabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sabalb_h:
-; CHECK: sabalb z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabalb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c)
@@ -27,8 +30,9 @@ define <vscale x 4 x i32> @sabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @sabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sabalb_s:
-; CHECK: sabalb z0.d, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabalb z0.d, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c)
@@ -41,8 +45,9 @@ define <vscale x 2 x i64> @sabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @sabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sabalt_b:
-; CHECK: sabalt z0.h, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabalt z0.h, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b,
                                                                   <vscale x 16 x i8> %c)
@@ -51,8 +56,9 @@ define <vscale x 8 x i16> @sabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @sabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sabalt_h:
-; CHECK: sabalt z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabalt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c)
@@ -61,8 +67,9 @@ define <vscale x 4 x i32> @sabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @sabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sabalt_s:
-; CHECK: sabalt z0.d, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabalt z0.d, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c)
@@ -75,8 +82,9 @@ define <vscale x 2 x i64> @sabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @sabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sabdlb_b:
-; CHECK: sabdlb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdlb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -84,8 +92,9 @@ define <vscale x 8 x i16> @sabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @sabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sabdlb_h:
-; CHECK: sabdlb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdlb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -93,8 +102,9 @@ define <vscale x 4 x i32> @sabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @sabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sabdlb_s:
-; CHECK: sabdlb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdlb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -106,8 +116,9 @@ define <vscale x 2 x i64> @sabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @sabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sabdlt_b:
-; CHECK: sabdlt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdlt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -115,8 +126,9 @@ define <vscale x 8 x i16> @sabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @sabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sabdlt_h:
-; CHECK: sabdlt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdlt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -124,8 +136,9 @@ define <vscale x 4 x i32> @sabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @sabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sabdlt_s:
-; CHECK: sabdlt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdlt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -137,8 +150,9 @@ define <vscale x 2 x i64> @sabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @saddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: saddlb_b:
-; CHECK: saddlb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -146,8 +160,9 @@ define <vscale x 8 x i16> @saddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @saddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: saddlb_h:
-; CHECK: saddlb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -155,8 +170,9 @@ define <vscale x 4 x i32> @saddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @saddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: saddlb_s:
-; CHECK: saddlb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -168,8 +184,9 @@ define <vscale x 2 x i64> @saddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @saddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: saddlt_b:
-; CHECK: saddlt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -177,8 +194,9 @@ define <vscale x 8 x i16> @saddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @saddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: saddlt_h:
-; CHECK: saddlt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -186,8 +204,9 @@ define <vscale x 4 x i32> @saddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @saddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: saddlt_s:
-; CHECK: saddlt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddlt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -199,8 +218,9 @@ define <vscale x 2 x i64> @saddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @saddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: saddwb_b:
-; CHECK: saddwb z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddwb z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -208,8 +228,9 @@ define <vscale x 8 x i16> @saddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @saddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: saddwb_h:
-; CHECK: saddwb z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddwb z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -217,8 +238,9 @@ define <vscale x 4 x i32> @saddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @saddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: saddwb_s:
-; CHECK: saddwb z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddwb z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -230,8 +252,9 @@ define <vscale x 2 x i64> @saddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @saddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: saddwt_b:
-; CHECK: saddwt z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddwt z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -239,8 +262,9 @@ define <vscale x 8 x i16> @saddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @saddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: saddwt_h:
-; CHECK: saddwt z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddwt z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -248,8 +272,9 @@ define <vscale x 4 x i32> @saddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @saddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: saddwt_s:
-; CHECK: saddwt z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    saddwt z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -262,8 +287,9 @@ define <vscale x 2 x i64> @saddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @smullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: smullb_b:
-; CHECK: smullb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -271,8 +297,9 @@ define <vscale x 8 x i16> @smullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @smullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smullb_h:
-; CHECK: smullb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -280,8 +307,9 @@ define <vscale x 4 x i32> @smullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @smullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smullb_s:
-; CHECK: smullb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -293,8 +321,9 @@ define <vscale x 2 x i64> @smullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @smullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smullb_lane_h:
-; CHECK: smullb z0.s, z0.h, z1.h[4]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullb z0.s, z0.h, z1.h[4]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        i32 4)
@@ -303,8 +332,9 @@ define <vscale x 4 x i32> @smullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i1
 
 define <vscale x 2 x i64> @smullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smullb_lane_s:
-; CHECK: smullb z0.d, z0.s, z1.s[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullb z0.d, z0.s, z1.s[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        i32 3)
@@ -317,8 +347,9 @@ define <vscale x 2 x i64> @smullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i3
 
 define <vscale x 8 x i16> @smullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: smullt_b:
-; CHECK: smullt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -326,8 +357,9 @@ define <vscale x 8 x i16> @smullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @smullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smullt_h:
-; CHECK: smullt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -335,8 +367,9 @@ define <vscale x 4 x i32> @smullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @smullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smullt_s:
-; CHECK: smullt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -348,8 +381,9 @@ define <vscale x 2 x i64> @smullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @smullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: smullt_lane_h:
-; CHECK: smullt z0.s, z0.h, z1.h[5]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullt z0.s, z0.h, z1.h[5]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        i32 5)
@@ -358,8 +392,9 @@ define <vscale x 4 x i32> @smullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i1
 
 define <vscale x 2 x i64> @smullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: smullt_lane_s:
-; CHECK: smullt z0.d, z0.s, z1.s[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smullt z0.d, z0.s, z1.s[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        i32 2)
@@ -372,8 +407,9 @@ define <vscale x 2 x i64> @smullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i3
 
 define <vscale x 8 x i16> @sqdmullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqdmullb_b:
-; CHECK: sqdmullb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -381,8 +417,9 @@ define <vscale x 8 x i16> @sqdmullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 4 x i32> @sqdmullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqdmullb_h:
-; CHECK: sqdmullb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -390,8 +427,9 @@ define <vscale x 4 x i32> @sqdmullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 2 x i64> @sqdmullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqdmullb_s:
-; CHECK: sqdmullb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -403,8 +441,9 @@ define <vscale x 2 x i64> @sqdmullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 4 x i32> @sqdmullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqdmullb_lane_h:
-; CHECK: sqdmullb z0.s, z0.h, z1.h[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullb z0.s, z0.h, z1.h[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16> %a,
                                                                          <vscale x 8 x i16> %b,
                                                                          i32 2)
@@ -413,8 +452,9 @@ define <vscale x 4 x i32> @sqdmullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x
 
 define <vscale x 2 x i64> @sqdmullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqdmullb_lane_s:
-; CHECK: sqdmullb z0.d, z0.s, z1.s[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullb z0.d, z0.s, z1.s[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32> %a,
                                                                          <vscale x 4 x i32> %b,
                                                                          i32 1)
@@ -427,8 +467,9 @@ define <vscale x 2 x i64> @sqdmullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x
 
 define <vscale x 8 x i16> @sqdmullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sqdmullt_b:
-; CHECK: sqdmullt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -436,8 +477,9 @@ define <vscale x 8 x i16> @sqdmullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 
 define <vscale x 4 x i32> @sqdmullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqdmullt_h:
-; CHECK: sqdmullt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -445,8 +487,9 @@ define <vscale x 4 x i32> @sqdmullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 
 define <vscale x 2 x i64> @sqdmullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqdmullt_s:
-; CHECK: sqdmullt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -458,8 +501,9 @@ define <vscale x 2 x i64> @sqdmullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 
 define <vscale x 4 x i32> @sqdmullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sqdmullt_lane_h:
-; CHECK: sqdmullt z0.s, z0.h, z1.h[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullt z0.s, z0.h, z1.h[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16> %a,
                                                                          <vscale x 8 x i16> %b,
                                                                          i32 3)
@@ -468,8 +512,9 @@ define <vscale x 4 x i32> @sqdmullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x
 
 define <vscale x 2 x i64> @sqdmullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sqdmullt_lane_s:
-; CHECK: sqdmullt z0.d, z0.s, z1.s[0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmullt z0.d, z0.s, z1.s[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32> %a,
                                                                          <vscale x 4 x i32> %b,
                                                                          i32 0)
@@ -482,8 +527,9 @@ define <vscale x 2 x i64> @sqdmullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x
 
 define <vscale x 8 x i16> @ssublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssublb_b:
-; CHECK: ssublb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -491,8 +537,9 @@ define <vscale x 8 x i16> @ssublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @ssublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssublb_h:
-; CHECK: ssublb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -500,8 +547,9 @@ define <vscale x 4 x i32> @ssublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @ssublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssublb_s:
-; CHECK: ssublb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -513,24 +561,27 @@ define <vscale x 2 x i64> @ssublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @sshllb_b(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sshllb_b:
-; CHECK: sshllb z0.h, z0.b, #0
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshllb z0.h, z0.b, #0
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8> %a, i32 0)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @sshllb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sshllb_h:
-; CHECK: sshllb z0.s, z0.h, #1
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshllb z0.s, z0.h, #1
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16> %a, i32 1)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @sshllb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sshllb_s:
-; CHECK: sshllb z0.d, z0.s, #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshllb z0.d, z0.s, #2
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32> %a, i32 2)
   ret <vscale x 2 x i64> %out
 }
@@ -541,24 +592,27 @@ define <vscale x 2 x i64> @sshllb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 8 x i16> @sshllt_b(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sshllt_b:
-; CHECK: sshllt z0.h, z0.b, #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshllt z0.h, z0.b, #3
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8> %a, i32 3)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @sshllt_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sshllt_h:
-; CHECK: sshllt z0.s, z0.h, #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshllt z0.s, z0.h, #4
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16> %a, i32 4)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @sshllt_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sshllt_s:
-; CHECK: sshllt z0.d, z0.s, #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshllt z0.d, z0.s, #5
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32> %a, i32 5)
   ret <vscale x 2 x i64> %out
 }
@@ -569,8 +623,9 @@ define <vscale x 2 x i64> @sshllt_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 8 x i16> @ssublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssublt_b:
-; CHECK: ssublt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -578,8 +633,9 @@ define <vscale x 8 x i16> @ssublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @ssublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssublt_h:
-; CHECK: ssublt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -587,8 +643,9 @@ define <vscale x 4 x i32> @ssublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @ssublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssublt_s:
-; CHECK: ssublt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssublt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -600,8 +657,9 @@ define <vscale x 2 x i64> @ssublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @ssubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssubwb_b:
-; CHECK: ssubwb z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubwb z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -609,8 +667,9 @@ define <vscale x 8 x i16> @ssubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @ssubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssubwb_h:
-; CHECK: ssubwb z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubwb z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -618,8 +677,9 @@ define <vscale x 4 x i32> @ssubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @ssubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssubwb_s:
-; CHECK: ssubwb z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubwb z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -631,8 +691,9 @@ define <vscale x 2 x i64> @ssubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @ssubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ssubwt_b:
-; CHECK: ssubwt z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubwt z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -640,8 +701,9 @@ define <vscale x 8 x i16> @ssubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @ssubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: ssubwt_h:
-; CHECK: ssubwt z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubwt z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -649,8 +711,9 @@ define <vscale x 4 x i32> @ssubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @ssubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: ssubwt_s:
-; CHECK: ssubwt z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ssubwt z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -662,8 +725,9 @@ define <vscale x 2 x i64> @ssubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: uabalb_b:
-; CHECK: uabalb z0.h, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabalb z0.h, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b,
                                                                   <vscale x 16 x i8> %c)
@@ -672,8 +736,9 @@ define <vscale x 8 x i16> @uabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: uabalb_h:
-; CHECK: uabalb z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabalb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c)
@@ -682,8 +747,9 @@ define <vscale x 4 x i32> @uabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: uabalb_s:
-; CHECK: uabalb z0.d, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabalb z0.d, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c)
@@ -696,8 +762,9 @@ define <vscale x 2 x i64> @uabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: uabalt_b:
-; CHECK: uabalt z0.h, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabalt z0.h, z1.b, z2.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b,
                                                                   <vscale x 16 x i8> %c)
@@ -706,8 +773,9 @@ define <vscale x 8 x i16> @uabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: uabalt_h:
-; CHECK: uabalt z0.s, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabalt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b,
                                                                   <vscale x 8 x i16> %c)
@@ -716,8 +784,9 @@ define <vscale x 4 x i32> @uabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: uabalt_s:
-; CHECK: uabalt z0.d, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabalt z0.d, z1.s, z2.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b,
                                                                   <vscale x 4 x i32> %c)
@@ -730,8 +799,9 @@ define <vscale x 2 x i64> @uabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uabdlb_b:
-; CHECK: uabdlb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdlb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -739,8 +809,9 @@ define <vscale x 8 x i16> @uabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uabdlb_h:
-; CHECK: uabdlb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdlb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -748,8 +819,9 @@ define <vscale x 4 x i32> @uabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uabdlb_s:
-; CHECK: uabdlb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdlb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -761,8 +833,9 @@ define <vscale x 2 x i64> @uabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uabdlt_b:
-; CHECK: uabdlt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdlt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -770,8 +843,9 @@ define <vscale x 8 x i16> @uabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uabdlt_h:
-; CHECK: uabdlt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdlt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -779,8 +853,9 @@ define <vscale x 4 x i32> @uabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uabdlt_s:
-; CHECK: uabdlt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdlt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -792,8 +867,9 @@ define <vscale x 2 x i64> @uabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uaddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uaddlb_b:
-; CHECK: uaddlb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -801,8 +877,9 @@ define <vscale x 8 x i16> @uaddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uaddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uaddlb_h:
-; CHECK: uaddlb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -810,8 +887,9 @@ define <vscale x 4 x i32> @uaddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uaddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uaddlb_s:
-; CHECK: uaddlb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -823,8 +901,9 @@ define <vscale x 2 x i64> @uaddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uaddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uaddlt_b:
-; CHECK: uaddlt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -832,8 +911,9 @@ define <vscale x 8 x i16> @uaddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uaddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uaddlt_h:
-; CHECK: uaddlt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -841,8 +921,9 @@ define <vscale x 4 x i32> @uaddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uaddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uaddlt_s:
-; CHECK: uaddlt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddlt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -854,8 +935,9 @@ define <vscale x 2 x i64> @uaddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uaddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uaddwb_b:
-; CHECK: uaddwb z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddwb z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -863,8 +945,9 @@ define <vscale x 8 x i16> @uaddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uaddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uaddwb_h:
-; CHECK: uaddwb z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddwb z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -872,8 +955,9 @@ define <vscale x 4 x i32> @uaddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uaddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uaddwb_s:
-; CHECK: uaddwb z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddwb z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -885,8 +969,9 @@ define <vscale x 2 x i64> @uaddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @uaddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uaddwt_b:
-; CHECK: uaddwt z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddwt z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -894,8 +979,9 @@ define <vscale x 8 x i16> @uaddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @uaddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uaddwt_h:
-; CHECK: uaddwt z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddwt z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -903,8 +989,9 @@ define <vscale x 4 x i32> @uaddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @uaddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uaddwt_s:
-; CHECK: uaddwt z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uaddwt z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -916,8 +1003,9 @@ define <vscale x 2 x i64> @uaddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @umullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: umullb_b:
-; CHECK: umullb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -925,8 +1013,9 @@ define <vscale x 8 x i16> @umullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @umullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umullb_h:
-; CHECK: umullb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -934,8 +1023,9 @@ define <vscale x 4 x i32> @umullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @umullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umullb_s:
-; CHECK: umullb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -947,8 +1037,9 @@ define <vscale x 2 x i64> @umullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @umullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umullb_lane_h:
-; CHECK: umullb z0.s, z0.h, z1.h[0]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullb z0.s, z0.h, z1.h[0]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        i32 0)
@@ -958,8 +1049,9 @@ define <vscale x 4 x i32> @umullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i1
 
 define <vscale x 2 x i64> @umullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umullb_lane_s:
-; CHECK: umullb z0.d, z0.s, z1.s[3]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullb z0.d, z0.s, z1.s[3]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        i32 3)
@@ -972,8 +1064,9 @@ define <vscale x 2 x i64> @umullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i3
 
 define <vscale x 8 x i16> @umullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: umullt_b:
-; CHECK: umullt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -981,8 +1074,9 @@ define <vscale x 8 x i16> @umullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @umullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umullt_h:
-; CHECK: umullt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -990,8 +1084,9 @@ define <vscale x 4 x i32> @umullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @umullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umullt_s:
-; CHECK: umullt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -1003,8 +1098,9 @@ define <vscale x 2 x i64> @umullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 4 x i32> @umullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: umullt_lane_h:
-; CHECK: umullt z0.s, z0.h, z1.h[1]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullt z0.s, z0.h, z1.h[1]
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        i32 1)
@@ -1013,8 +1109,9 @@ define <vscale x 4 x i32> @umullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i1
 
 define <vscale x 2 x i64> @umullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: umullt_lane_s:
-; CHECK: umullt z0.d, z0.s, z1.s[2]
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umullt z0.d, z0.s, z1.s[2]
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        i32 2)
@@ -1027,24 +1124,27 @@ define <vscale x 2 x i64> @umullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i3
 
 define <vscale x 8 x i16> @ushllb_b(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: ushllb_b:
-; CHECK: ushllb z0.h, z0.b, #6
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushllb z0.h, z0.b, #6
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8> %a, i32 6)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @ushllb_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: ushllb_h:
-; CHECK: ushllb z0.s, z0.h, #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushllb z0.s, z0.h, #7
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16> %a, i32 7)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @ushllb_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: ushllb_s:
-; CHECK: ushllb z0.d, z0.s, #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushllb z0.d, z0.s, #8
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32> %a, i32 8)
   ret <vscale x 2 x i64> %out
 }
@@ -1055,24 +1155,27 @@ define <vscale x 2 x i64> @ushllb_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 8 x i16> @ushllt_b(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: ushllt_b:
-; CHECK: ushllt z0.h, z0.b, #7
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushllt z0.h, z0.b, #7
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8> %a, i32 7)
   ret <vscale x 8 x i16> %out
 }
 
 define <vscale x 4 x i32> @ushllt_h(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: ushllt_h:
-; CHECK: ushllt z0.s, z0.h, #15
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushllt z0.s, z0.h, #15
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16> %a, i32 15)
   ret <vscale x 4 x i32> %out
 }
 
 define <vscale x 2 x i64> @ushllt_s(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: ushllt_s:
-; CHECK: ushllt z0.d, z0.s, #31
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushllt z0.d, z0.s, #31
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32> %a, i32 31)
   ret <vscale x 2 x i64> %out
 }
@@ -1083,8 +1186,9 @@ define <vscale x 2 x i64> @ushllt_s(<vscale x 4 x i32> %a) {
 
 define <vscale x 8 x i16> @usublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: usublb_b:
-; CHECK: usublb z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usublb z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -1092,8 +1196,9 @@ define <vscale x 8 x i16> @usublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @usublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: usublb_h:
-; CHECK: usublb z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usublb z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -1101,8 +1206,9 @@ define <vscale x 4 x i32> @usublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @usublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: usublb_s:
-; CHECK: usublb z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usublb z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -1114,8 +1220,9 @@ define <vscale x 2 x i64> @usublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @usublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: usublt_b:
-; CHECK: usublt z0.h, z0.b, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usublt z0.h, z0.b, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -1123,8 +1230,9 @@ define <vscale x 8 x i16> @usublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @usublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: usublt_h:
-; CHECK: usublt z0.s, z0.h, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usublt z0.s, z0.h, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -1132,8 +1240,9 @@ define <vscale x 4 x i32> @usublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @usublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: usublt_s:
-; CHECK: usublt z0.d, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usublt z0.d, z0.s, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -1145,8 +1254,9 @@ define <vscale x 2 x i64> @usublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @usubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: usubwb_b:
-; CHECK: usubwb z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usubwb z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -1154,8 +1264,9 @@ define <vscale x 8 x i16> @usubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @usubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: usubwb_h:
-; CHECK: usubwb z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usubwb z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -1163,8 +1274,9 @@ define <vscale x 4 x i32> @usubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @usubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: usubwb_s:
-; CHECK: usubwb z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usubwb z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out
@@ -1176,8 +1288,9 @@ define <vscale x 2 x i64> @usubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b
 
 define <vscale x 8 x i16> @usubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: usubwt_b:
-; CHECK: usubwt z0.h, z0.h, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usubwt z0.h, z0.h, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 8 x i16> %out
@@ -1185,8 +1298,9 @@ define <vscale x 8 x i16> @usubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b
 
 define <vscale x 4 x i32> @usubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: usubwt_h:
-; CHECK: usubwt z0.s, z0.s, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usubwt z0.s, z0.s, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 4 x i32> %out
@@ -1194,8 +1308,9 @@ define <vscale x 4 x i32> @usubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b
 
 define <vscale x 2 x i64> @usubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: usubwt_s:
-; CHECK: usubwt z0.d, z0.d, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    usubwt z0.d, z0.d, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 2 x i64> %out

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
index 5edf240bbee6b..c83ab43bbe693 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
@@ -7,8 +8,9 @@
 
 define <vscale x 8 x i16> @sadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: sadalp_i8:
-; CHECK: sadalp z0.h, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sadalp z0.h, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sadalp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -17,8 +19,9 @@ define <vscale x 8 x i16> @sadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @sadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: sadalp_i16:
-; CHECK: sadalp z0.s, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sadalp z0.s, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sadalp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -27,8 +30,9 @@ define <vscale x 4 x i32> @sadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @sadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: sadalp_i32:
-; CHECK: sadalp z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sadalp z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sadalp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)
@@ -41,8 +45,9 @@ define <vscale x 2 x i64> @sadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 
 define <vscale x 8 x i16> @uadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: uadalp_i8:
-; CHECK: uadalp z0.h, p0/m, z1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uadalp z0.h, p0/m, z1.b
+; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uadalp.nxv8i16(<vscale x 8 x i1> %pg,
                                                                   <vscale x 8 x i16> %a,
                                                                   <vscale x 16 x i8> %b)
@@ -51,8 +56,9 @@ define <vscale x 8 x i16> @uadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
 
 define <vscale x 4 x i32> @uadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: uadalp_i16:
-; CHECK: uadalp z0.s, p0/m, z1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uadalp z0.s, p0/m, z1.h
+; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uadalp.nxv4i32(<vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %a,
                                                                   <vscale x 8 x i16> %b)
@@ -61,8 +67,9 @@ define <vscale x 4 x i32> @uadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 
 define <vscale x 2 x i64> @uadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: uadalp_i32:
-; CHECK: uadalp z0.d, p0/m, z1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uadalp z0.d, p0/m, z1.s
+; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uadalp.nxv2i64(<vscale x 2 x i1> %pg,
                                                                   <vscale x 2 x i64> %a,
                                                                   <vscale x 4 x i32> %b)


        


More information about the llvm-commits mailing list