[clang] [Clang][AArch64] Add missing builtins for __ARM_FEATURE_SME2p1. (PR #147362)

Wed Jul 9 03:20:08 PDT 2025

https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/147362

>From d0ba55fb5993b1fe1a1cc161c222e96d495081d3 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Mon, 7 Jul 2025 17:43:11 +0100
Subject: [PATCH 1/2] Update tests to be more amenable for testing SME.

Add macro to allow the testing of streaming mode.
Split sve2p1 load/store tests to extract the struct variants that are
available to sme2p1 from the gather/scatter variants that are not.
---
 .../AArch64/sve2-intrinsics/acle_sve2_rax1.c  |   12 +-
 .../sve2p1-intrinsics/acle_sve2p1_dupq.c      |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_extq.c      |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_fp_reduce.c |   38 +-
 .../acle_sve2p1_int_reduce.c                  |  103 +-
 .../acle_sve2p1_load_struct.c                 | 1608 ++++++++++++++++
 .../sve2p1-intrinsics/acle_sve2p1_loads.c     | 1584 ----------------
 .../acle_sve2p1_pmov_to_pred.c                |   40 +-
 .../acle_sve2p1_pmov_to_vector.c              |   36 +-
 .../sve2p1-intrinsics/acle_sve2p1_store.c     | 1603 ----------------
 .../acle_sve2p1_store_struct.c                | 1626 +++++++++++++++++
 .../sve2p1-intrinsics/acle_sve2p1_tblq.c      |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_tbxq.c      |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_uzpq1.c     |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_uzpq2.c     |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_zipq1.c     |   34 +-
 .../sve2p1-intrinsics/acle_sve2p1_zipq2.c     |   34 +-
 17 files changed, 3536 insertions(+), 3386 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c
 create mode 100644 clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c

diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
index 18cc3033e49f5..82a4d2a8f44a0 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
@@ -15,6 +15,14 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: @test_svrax1_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2) ATTR
 {
   return SVE_ACLE_FUNC(svrax1,_s64,,)(op1, op2);
 }
@@ -40,7 +48,7 @@ svint64_t test_svrax1_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svrax1_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svrax1_u64(svuint64_t op1, svuint64_t op2) ATTR
 {
   return SVE_ACLE_FUNC(svrax1,_u64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
index cf1c00cdd56b2..bde1797235162 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
@@ -20,6 +20,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_laneq_s8(svint8_t zn) {
+svint8_t test_svdup_laneq_s8(svint8_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
 }
 
@@ -48,7 +56,7 @@ svint8_t test_svdup_laneq_s8(svint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
+svuint8_t test_svdup_laneq_u8(svuint8_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
 }
 
@@ -64,7 +72,7 @@ svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svdup_laneq_s16(svint16_t zn) {
+svint16_t test_svdup_laneq_s16(svint16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
 }
 
@@ -80,7 +88,7 @@ svint16_t test_svdup_laneq_s16(svint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
+svuint16_t test_svdup_laneq_u16(svuint16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
 }
 
@@ -96,7 +104,7 @@ svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdup_laneq_s32(svint32_t zn) {
+svint32_t test_svdup_laneq_s32(svint32_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
 }
 
@@ -112,7 +120,7 @@ svint32_t test_svdup_laneq_s32(svint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
+svuint32_t test_svdup_laneq_u32(svuint32_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
 }
 
@@ -128,7 +136,7 @@ svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdup_laneq_s64(svint64_t zn) {
+svint64_t test_svdup_laneq_s64(svint64_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
 }
 
@@ -144,7 +152,7 @@ svint64_t test_svdup_laneq_s64(svint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
+svuint64_t test_svdup_laneq_u64(svuint64_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
 }
 
@@ -160,7 +168,7 @@ svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
+svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
 }
 
@@ -176,7 +184,7 @@ svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
+svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
 }
 
@@ -192,7 +200,7 @@ svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
+svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
 }
 
@@ -208,7 +216,7 @@ svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
+svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
 }
 
@@ -224,6 +232,6 @@ svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) {
+svmfloat8_t test_svdup_laneq_mf8(svmfloat8_t zn) ATTR {
     return SVE_ACLE_FUNC(svdup_laneq, _mf8)(zn, 1);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
index d46e67b9918aa..64058ef08ff97 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -20,6 +20,14 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svextq_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svextq_u8(svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svextq_u8(svuint8_t zn, svuint8_t zm) ATTR {
   return SVE_ACLE_FUNC(svextq, _u8,,)(zn, zm, 0);
 }
 
@@ -48,7 +56,7 @@ svuint8_t test_svextq_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 4)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svextq_s8(svint8_t zn, svint8_t zm) {
+svint8_t test_svextq_s8(svint8_t zn, svint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _s8,,)(zn, zm, 4);
 }
 
@@ -64,7 +72,7 @@ svint8_t test_svextq_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svextq_u16(svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svextq_u16(svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svextq, _u16,,)(zn, zm, 1);
 }
 
@@ -80,7 +88,7 @@ svuint16_t test_svextq_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]], i32 5)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svextq_s16(svint16_t zn, svint16_t zm) {
+svint16_t test_svextq_s16(svint16_t zn, svint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _s16,,)(zn, zm, 5);
 }
 
@@ -96,7 +104,7 @@ svint16_t test_svextq_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svextq_u32(svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svextq_u32(svuint32_t zn, svuint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svextq, _u32,,)(zn, zm, 2);
 }
 
@@ -112,7 +120,7 @@ svuint32_t test_svextq_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svextq_s32(svint32_t zn, svint32_t zm) {
+svint32_t test_svextq_s32(svint32_t zn, svint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _s32,,)(zn, zm, 3);
 }
 
@@ -128,7 +136,7 @@ svint32_t test_svextq_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svextq_u64(svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svextq_u64(svuint64_t zn, svuint64_t zm) ATTR {
   return SVE_ACLE_FUNC(svextq, _u64,,)(zn, zm, 1);
 }
 
@@ -144,7 +152,7 @@ svuint64_t test_svextq_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svextq_s64(svint64_t zn, svint64_t zm) {
+svint64_t test_svextq_s64(svint64_t zn, svint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _s64,,)(zn, zm, 0);
 }
 
@@ -160,7 +168,7 @@ svint64_t test_svextq_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svextq_f16(svfloat16_t zn, svfloat16_t zm) {
+svfloat16_t test_svextq_f16(svfloat16_t zn, svfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _f16,,)(zn, zm, 7);
 }
 
@@ -176,7 +184,7 @@ svfloat16_t test_svextq_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svextq_f32(svfloat32_t zn, svfloat32_t zm) {
+svfloat32_t test_svextq_f32(svfloat32_t zn, svfloat32_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _f32,,)(zn, zm, 2);
 }
 
@@ -192,7 +200,7 @@ svfloat32_t test_svextq_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) {
+svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _f64,,)(zn, zm, 0);
 }
 
@@ -208,7 +216,7 @@ svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 6);
 }
 
@@ -224,6 +232,6 @@ svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 6)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svextq_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+svmfloat8_t test_svextq_mf8(svmfloat8_t zn, svmfloat8_t zm) ATTR {
     return SVE_ACLE_FUNC(svextq, _mf8,,)(zn, zm, 6);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
index 9aaf4c7aed2ff..3544bde378e2f 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
@@ -15,6 +15,14 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // FADDQV
 
 // CHECK-LABEL: @test_svaddqv_f16(
@@ -29,7 +37,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.faddqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
 //
-float16x8_t test_svaddqv_f16(svbool_t pg, svfloat16_t op)
+float16x8_t test_svaddqv_f16(svbool_t pg, svfloat16_t op) ATTR
 {
   return SVE_ACLE_FUNC(svaddqv,,_f16,)(pg, op);
 }
@@ -46,7 +54,7 @@ float16x8_t test_svaddqv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.faddqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
 //
-float32x4_t test_svaddqv_f32(svbool_t pg, svfloat32_t op)
+float32x4_t test_svaddqv_f32(svbool_t pg, svfloat32_t op) ATTR
 {
   return SVE_ACLE_FUNC(svaddqv,,_f32,)(pg, op);
 }
@@ -63,7 +71,7 @@ float32x4_t test_svaddqv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.faddqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
 //
-float64x2_t test_svaddqv_f64(svbool_t pg, svfloat64_t op)
+float64x2_t test_svaddqv_f64(svbool_t pg, svfloat64_t op) ATTR
 {
   return SVE_ACLE_FUNC(svaddqv,,_f64,)(pg, op);
 }
@@ -83,7 +91,7 @@ float64x2_t test_svaddqv_f64(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fmaxqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
 //
-float16x8_t test_svmaxqv_f16(svbool_t pg, svfloat16_t op)
+float16x8_t test_svmaxqv_f16(svbool_t pg, svfloat16_t op) ATTR
 {
   return SVE_ACLE_FUNC(svmaxqv,,_f16,)(pg, op);
 }
@@ -100,7 +108,7 @@ float16x8_t test_svmaxqv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
 //
-float32x4_t test_svmaxqv_f32(svbool_t pg, svfloat32_t op)
+float32x4_t test_svmaxqv_f32(svbool_t pg, svfloat32_t op) ATTR
 {
   return SVE_ACLE_FUNC(svmaxqv,,_f32,)(pg, op);
 }
@@ -117,7 +125,7 @@ float32x4_t test_svmaxqv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
 //
-float64x2_t test_svmaxqv_f64(svbool_t pg, svfloat64_t op)
+float64x2_t test_svmaxqv_f64(svbool_t pg, svfloat64_t op) ATTR
 {
   return SVE_ACLE_FUNC(svmaxqv,,_f64,)(pg, op);
 }
@@ -137,7 +145,7 @@ float64x2_t test_svmaxqv_f64(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fminqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
 //
-float16x8_t test_svminqv_f16(svbool_t pg, svfloat16_t op)
+float16x8_t test_svminqv_f16(svbool_t pg, svfloat16_t op) ATTR
 {
   return SVE_ACLE_FUNC(svminqv,,_f16,)(pg, op);
 }
@@ -154,7 +162,7 @@ float16x8_t test_svminqv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
 //
-float32x4_t test_svminqv_f32(svbool_t pg, svfloat32_t op)
+float32x4_t test_svminqv_f32(svbool_t pg, svfloat32_t op) ATTR
 {
   return SVE_ACLE_FUNC(svminqv,,_f32,)(pg, op);
 }
@@ -171,7 +179,7 @@ float32x4_t test_svminqv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
 //
-float64x2_t test_svminqv_f64(svbool_t pg, svfloat64_t op)
+float64x2_t test_svminqv_f64(svbool_t pg, svfloat64_t op) ATTR
 {
   return SVE_ACLE_FUNC(svminqv,,_f64,)(pg, op);
 }
@@ -191,7 +199,7 @@ float64x2_t test_svminqv_f64(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fmaxnmqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
 //
-float16x8_t test_svmaxnmqv_f16(svbool_t pg, svfloat16_t op)
+float16x8_t test_svmaxnmqv_f16(svbool_t pg, svfloat16_t op) ATTR
 {
   return SVE_ACLE_FUNC(svmaxnmqv,,_f16,)(pg, op);
 }
@@ -208,7 +216,7 @@ float16x8_t test_svmaxnmqv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
 //
-float32x4_t test_svmaxnmqv_f32(svbool_t pg, svfloat32_t op)
+float32x4_t test_svmaxnmqv_f32(svbool_t pg, svfloat32_t op) ATTR
 {
   return SVE_ACLE_FUNC(svmaxnmqv,,_f32,)(pg, op);
 }
@@ -225,7 +233,7 @@ float32x4_t test_svmaxnmqv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fmaxnmqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
 //
-float64x2_t test_svmaxnmqv_f64(svbool_t pg, svfloat64_t op)
+float64x2_t test_svmaxnmqv_f64(svbool_t pg, svfloat64_t op) ATTR
 {
   return SVE_ACLE_FUNC(svmaxnmqv,,_f64,)(pg, op);
 }
@@ -245,7 +253,7 @@ float64x2_t test_svmaxnmqv_f64(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fminnmqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
 //
-float16x8_t test_svminnmqv_f16(svbool_t pg, svfloat16_t op)
+float16x8_t test_svminnmqv_f16(svbool_t pg, svfloat16_t op) ATTR
 {
   return SVE_ACLE_FUNC(svminnmqv,,_f16,)(pg, op);
 }
@@ -262,7 +270,7 @@ float16x8_t test_svminnmqv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
 //
-float32x4_t test_svminnmqv_f32(svbool_t pg, svfloat32_t op)
+float32x4_t test_svminnmqv_f32(svbool_t pg, svfloat32_t op) ATTR
 {
   return SVE_ACLE_FUNC(svminnmqv,,_f32,)(pg, op);
 }
@@ -279,7 +287,7 @@ float32x4_t test_svminnmqv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fminnmqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
 //
-float64x2_t test_svminnmqv_f64(svbool_t pg, svfloat64_t op)
+float64x2_t test_svminnmqv_f64(svbool_t pg, svfloat64_t op) ATTR
 {
   return SVE_ACLE_FUNC(svminnmqv,,_f64,)(pg, op);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c
index 9e2a12520ea9d..4aea5890826bb 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c
@@ -14,6 +14,13 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
 
 // ADDQV
 
@@ -27,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-int8x16_t test_svaddqv_s8(svbool_t pg, svint8_t op1) {
+int8x16_t test_svaddqv_s8(svbool_t pg, svint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_s8,,)(pg, op1);
 }
 
@@ -43,7 +50,7 @@ int8x16_t test_svaddqv_s8(svbool_t pg, svint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-int16x8_t test_svaddqv_s16(svbool_t pg, svint16_t op1) {
+int16x8_t test_svaddqv_s16(svbool_t pg, svint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_s16,,)(pg, op1);
 }
 
@@ -59,7 +66,7 @@ int16x8_t test_svaddqv_s16(svbool_t pg, svint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-int32x4_t test_svaddqv_s32(svbool_t pg, svint32_t op1) {
+int32x4_t test_svaddqv_s32(svbool_t pg, svint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_s32,,)(pg, op1);
 }
 
@@ -75,7 +82,7 @@ int32x4_t test_svaddqv_s32(svbool_t pg, svint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-int64x2_t test_svaddqv_s64(svbool_t pg, svint64_t op1) {
+int64x2_t test_svaddqv_s64(svbool_t pg, svint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_s64,,)(pg, op1);
 }
 
@@ -89,7 +96,7 @@ int64x2_t test_svaddqv_s64(svbool_t pg, svint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-uint8x16_t test_svaddqv_u8(svbool_t pg, svuint8_t op1) {
+uint8x16_t test_svaddqv_u8(svbool_t pg, svuint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_u8,,)(pg, op1);
 }
 
@@ -105,7 +112,7 @@ uint8x16_t test_svaddqv_u8(svbool_t pg, svuint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-uint16x8_t test_svaddqv_u16(svbool_t pg, svuint16_t op1) {
+uint16x8_t test_svaddqv_u16(svbool_t pg, svuint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_u16,,)(pg, op1);
 }
 
@@ -121,7 +128,7 @@ uint16x8_t test_svaddqv_u16(svbool_t pg, svuint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-uint32x4_t test_svaddqv_u32(svbool_t pg, svuint32_t op1) {
+uint32x4_t test_svaddqv_u32(svbool_t pg, svuint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_u32,,)(pg, op1);
 }
 
@@ -137,7 +144,7 @@ uint32x4_t test_svaddqv_u32(svbool_t pg, svuint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-uint64x2_t test_svaddqv_u64(svbool_t pg, svuint64_t op1) {
+uint64x2_t test_svaddqv_u64(svbool_t pg, svuint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svaddqv,_u64,,)(pg, op1);
 }
 
@@ -154,7 +161,7 @@ uint64x2_t test_svaddqv_u64(svbool_t pg, svuint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-int8x16_t test_svandqv_s8(svbool_t pg, svint8_t op1) {
+int8x16_t test_svandqv_s8(svbool_t pg, svint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_s8,,)(pg, op1);
 }
 
@@ -170,7 +177,7 @@ int8x16_t test_svandqv_s8(svbool_t pg, svint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-int16x8_t test_svandqv_s16(svbool_t pg, svint16_t op1) {
+int16x8_t test_svandqv_s16(svbool_t pg, svint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_s16,,)(pg, op1);
 }
 
@@ -186,7 +193,7 @@ int16x8_t test_svandqv_s16(svbool_t pg, svint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-int32x4_t test_svandqv_s32(svbool_t pg, svint32_t op1) {
+int32x4_t test_svandqv_s32(svbool_t pg, svint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_s32,,)(pg, op1);
 }
 
@@ -202,7 +209,7 @@ int32x4_t test_svandqv_s32(svbool_t pg, svint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-int64x2_t test_svandqv_s64(svbool_t pg, svint64_t op1) {
+int64x2_t test_svandqv_s64(svbool_t pg, svint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_s64,,)(pg, op1);
 }
 
@@ -216,7 +223,7 @@ int64x2_t test_svandqv_s64(svbool_t pg, svint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-uint8x16_t test_svandqv_u8(svbool_t pg, svuint8_t op1) {
+uint8x16_t test_svandqv_u8(svbool_t pg, svuint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_u8,,)(pg, op1);
 }
 
@@ -232,7 +239,7 @@ uint8x16_t test_svandqv_u8(svbool_t pg, svuint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-uint16x8_t test_svandqv_u16(svbool_t pg, svuint16_t op1) {
+uint16x8_t test_svandqv_u16(svbool_t pg, svuint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_u16,,)(pg, op1);
 }
 
@@ -248,7 +255,7 @@ uint16x8_t test_svandqv_u16(svbool_t pg, svuint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-uint32x4_t test_svandqv_u32(svbool_t pg, svuint32_t op1) {
+uint32x4_t test_svandqv_u32(svbool_t pg, svuint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_u32,,)(pg, op1);
 }
 
@@ -264,7 +271,7 @@ uint32x4_t test_svandqv_u32(svbool_t pg, svuint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-uint64x2_t test_svandqv_u64(svbool_t pg, svuint64_t op1) {
+uint64x2_t test_svandqv_u64(svbool_t pg, svuint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svandqv,_u64,,)(pg, op1);
 }
 
@@ -281,7 +288,7 @@ uint64x2_t test_svandqv_u64(svbool_t pg, svuint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-int8x16_t test_sveorqv_s8(svbool_t pg, svint8_t op1) {
+int8x16_t test_sveorqv_s8(svbool_t pg, svint8_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_s8,,)(pg, op1);
 }
 
@@ -297,7 +304,7 @@ int8x16_t test_sveorqv_s8(svbool_t pg, svint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-int16x8_t test_sveorqv_s16(svbool_t pg, svint16_t op1) {
+int16x8_t test_sveorqv_s16(svbool_t pg, svint16_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_s16,,)(pg, op1);
 }
 
@@ -313,7 +320,7 @@ int16x8_t test_sveorqv_s16(svbool_t pg, svint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-int32x4_t test_sveorqv_s32(svbool_t pg, svint32_t op1) {
+int32x4_t test_sveorqv_s32(svbool_t pg, svint32_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_s32,,)(pg, op1);
 }
 
@@ -329,7 +336,7 @@ int32x4_t test_sveorqv_s32(svbool_t pg, svint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-int64x2_t test_sveorqv_s64(svbool_t pg, svint64_t op1) {
+int64x2_t test_sveorqv_s64(svbool_t pg, svint64_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_s64,,)(pg, op1);
 }
 
@@ -343,7 +350,7 @@ int64x2_t test_sveorqv_s64(svbool_t pg, svint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-uint8x16_t test_sveorqv_u8(svbool_t pg, svuint8_t op1) {
+uint8x16_t test_sveorqv_u8(svbool_t pg, svuint8_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_u8,,)(pg, op1);
 }
 
@@ -359,7 +366,7 @@ uint8x16_t test_sveorqv_u8(svbool_t pg, svuint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-uint16x8_t test_sveorqv_u16(svbool_t pg, svuint16_t op1) {
+uint16x8_t test_sveorqv_u16(svbool_t pg, svuint16_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_u16,,)(pg, op1);
 }
 
@@ -375,7 +382,7 @@ uint16x8_t test_sveorqv_u16(svbool_t pg, svuint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-uint32x4_t test_sveorqv_u32(svbool_t pg, svuint32_t op1) {
+uint32x4_t test_sveorqv_u32(svbool_t pg, svuint32_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_u32,,)(pg, op1);
 }
 
@@ -391,7 +398,7 @@ uint32x4_t test_sveorqv_u32(svbool_t pg, svuint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-uint64x2_t test_sveorqv_u64(svbool_t pg, svuint64_t op1) {
+uint64x2_t test_sveorqv_u64(svbool_t pg, svuint64_t op1) ATTR {
   return SVE_ACLE_FUNC(sveorqv,_u64,,)(pg, op1);
 }
 
@@ -408,7 +415,7 @@ uint64x2_t test_sveorqv_u64(svbool_t pg, svuint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-int8x16_t test_svorqv_s8(svbool_t pg, svint8_t op1) {
+int8x16_t test_svorqv_s8(svbool_t pg, svint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_s8,,)(pg, op1);
 }
 
@@ -424,7 +431,7 @@ int8x16_t test_svorqv_s8(svbool_t pg, svint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-int16x8_t test_svorqv_s16(svbool_t pg, svint16_t op1) {
+int16x8_t test_svorqv_s16(svbool_t pg, svint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_s16,,)(pg, op1);
 }
 
@@ -440,7 +447,7 @@ int16x8_t test_svorqv_s16(svbool_t pg, svint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-int32x4_t test_svorqv_s32(svbool_t pg, svint32_t op1) {
+int32x4_t test_svorqv_s32(svbool_t pg, svint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_s32,,)(pg, op1);
 }
 
@@ -456,7 +463,7 @@ int32x4_t test_svorqv_s32(svbool_t pg, svint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-int64x2_t test_svorqv_s64(svbool_t pg, svint64_t op1) {
+int64x2_t test_svorqv_s64(svbool_t pg, svint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_s64,,)(pg, op1);
 }
 
@@ -470,7 +477,7 @@ int64x2_t test_svorqv_s64(svbool_t pg, svint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-uint8x16_t test_svorqv_u8(svbool_t pg, svuint8_t op1) {
+uint8x16_t test_svorqv_u8(svbool_t pg, svuint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_u8,,)(pg, op1);
 }
 
@@ -486,7 +493,7 @@ uint8x16_t test_svorqv_u8(svbool_t pg, svuint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-uint16x8_t test_svorqv_u16(svbool_t pg, svuint16_t op1) {
+uint16x8_t test_svorqv_u16(svbool_t pg, svuint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_u16,,)(pg, op1);
 }
 
@@ -502,7 +509,7 @@ uint16x8_t test_svorqv_u16(svbool_t pg, svuint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-uint32x4_t test_svorqv_u32(svbool_t pg, svuint32_t op1) {
+uint32x4_t test_svorqv_u32(svbool_t pg, svuint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_u32,,)(pg, op1);
 }
 
@@ -518,7 +525,7 @@ uint32x4_t test_svorqv_u32(svbool_t pg, svuint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-uint64x2_t test_svorqv_u64(svbool_t pg, svuint64_t op1) {
+uint64x2_t test_svorqv_u64(svbool_t pg, svuint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svorqv,_u64,,)(pg, op1);
 }
 
@@ -535,7 +542,7 @@ uint64x2_t test_svorqv_u64(svbool_t pg, svuint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.smaxqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-int8x16_t test_svmaxqv_s8(svbool_t pg, svint8_t op1) {
+int8x16_t test_svmaxqv_s8(svbool_t pg, svint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_s8,,)(pg, op1);
 }
 
@@ -551,7 +558,7 @@ int8x16_t test_svmaxqv_s8(svbool_t pg, svint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-int16x8_t test_svmaxqv_s16(svbool_t pg, svint16_t op1) {
+int16x8_t test_svmaxqv_s16(svbool_t pg, svint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_s16,,)(pg, op1);
 }
 
@@ -567,7 +574,7 @@ int16x8_t test_svmaxqv_s16(svbool_t pg, svint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.smaxqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-int32x4_t test_svmaxqv_s32(svbool_t pg, svint32_t op1) {
+int32x4_t test_svmaxqv_s32(svbool_t pg, svint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_s32,,)(pg, op1);
 }
 
@@ -583,7 +590,7 @@ int32x4_t test_svmaxqv_s32(svbool_t pg, svint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.smaxqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-int64x2_t test_svmaxqv_s64(svbool_t pg, svint64_t op1) {
+int64x2_t test_svmaxqv_s64(svbool_t pg, svint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_s64,,)(pg, op1);
 }
 
@@ -600,7 +607,7 @@ int64x2_t test_svmaxqv_s64(svbool_t pg, svint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.umaxqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-uint8x16_t test_svmaxqv_u8(svbool_t pg, svuint8_t op1) {
+uint8x16_t test_svmaxqv_u8(svbool_t pg, svuint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_u8,,)(pg, op1);
 }
 
@@ -616,7 +623,7 @@ uint8x16_t test_svmaxqv_u8(svbool_t pg, svuint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-uint16x8_t test_svmaxqv_u16(svbool_t pg, svuint16_t op1) {
+uint16x8_t test_svmaxqv_u16(svbool_t pg, svuint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_u16,,)(pg, op1);
 }
 
@@ -632,7 +639,7 @@ uint16x8_t test_svmaxqv_u16(svbool_t pg, svuint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.umaxqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-uint32x4_t test_svmaxqv_u32(svbool_t pg, svuint32_t op1) {
+uint32x4_t test_svmaxqv_u32(svbool_t pg, svuint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_u32,,)(pg, op1);
 }
 
@@ -648,7 +655,7 @@ uint32x4_t test_svmaxqv_u32(svbool_t pg, svuint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.umaxqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-uint64x2_t test_svmaxqv_u64(svbool_t pg, svuint64_t op1) {
+uint64x2_t test_svmaxqv_u64(svbool_t pg, svuint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svmaxqv,_u64,,)(pg, op1);
 }
 
@@ -665,7 +672,7 @@ uint64x2_t test_svmaxqv_u64(svbool_t pg, svuint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.sminqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-int8x16_t test_svminqv_s8(svbool_t pg, svint8_t op1) {
+int8x16_t test_svminqv_s8(svbool_t pg, svint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_s8,,)(pg, op1);
 }
 
@@ -681,7 +688,7 @@ int8x16_t test_svminqv_s8(svbool_t pg, svint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-int16x8_t test_svminqv_s16(svbool_t pg, svint16_t op1) {
+int16x8_t test_svminqv_s16(svbool_t pg, svint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_s16,,)(pg, op1);
 }
 
@@ -697,7 +704,7 @@ int16x8_t test_svminqv_s16(svbool_t pg, svint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.sminqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-int32x4_t test_svminqv_s32(svbool_t pg, svint32_t op1) {
+int32x4_t test_svminqv_s32(svbool_t pg, svint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_s32,,)(pg, op1);
 }
 
@@ -713,7 +720,7 @@ int32x4_t test_svminqv_s32(svbool_t pg, svint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.sminqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-int64x2_t test_svminqv_s64(svbool_t pg, svint64_t op1) {
+int64x2_t test_svminqv_s64(svbool_t pg, svint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_s64,,)(pg, op1);
 }
 
@@ -730,7 +737,7 @@ int64x2_t test_svminqv_s64(svbool_t pg, svint64_t op1) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.uminqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
-uint8x16_t test_svminqv_u8(svbool_t pg, svuint8_t op1) {
+uint8x16_t test_svminqv_u8(svbool_t pg, svuint8_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_u8,,)(pg, op1);
 }
 
@@ -746,7 +753,7 @@ uint8x16_t test_svminqv_u8(svbool_t pg, svuint8_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
-uint16x8_t test_svminqv_u16(svbool_t pg, svuint16_t op1) {
+uint16x8_t test_svminqv_u16(svbool_t pg, svuint16_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_u16,,)(pg, op1);
 }
 
@@ -762,7 +769,7 @@ uint16x8_t test_svminqv_u16(svbool_t pg, svuint16_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.uminqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
-uint32x4_t test_svminqv_u32(svbool_t pg, svuint32_t op1) {
+uint32x4_t test_svminqv_u32(svbool_t pg, svuint32_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_u32,,)(pg, op1);
 }
 
@@ -778,6 +785,6 @@ uint32x4_t test_svminqv_u32(svbool_t pg, svuint32_t op1) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.uminqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 //
-uint64x2_t test_svminqv_u64(svbool_t pg, svuint64_t op1) {
+uint64x2_t test_svminqv_u64(svbool_t pg, svuint64_t op1) ATTR {
   return SVE_ACLE_FUNC(svminqv,_u64,,)(pg, op1);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c
new file mode 100644
index 0000000000000..8c41b5d91cc53
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c
@@ -0,0 +1,1608 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+// CHECK-LABEL: @test_svld2q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svld2q_u8u10__SVBool_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x2_t test_svld2q_u8(svbool_t pg, const uint8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_u8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svld2q_s8u10__SVBool_tPKa(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svint8x2_t test_svld2q_s8(svbool_t pg, const int8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_s8,)(pg, base);
+}
+// CHECK-LABEL: @test_svld2q_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_u16u10__SVBool_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+svuint16x2_t test_svld2q_u16(svbool_t pg, const uint16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_u16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_s16u10__SVBool_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+svint16x2_t test_svld2q_s16(svbool_t pg, const int16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_s16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_u32u10__SVBool_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+svuint32x2_t test_svld2q_u32(svbool_t pg, const uint32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_u32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_s32u10__SVBool_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+svint32x2_t test_svld2q_s32(svbool_t pg, const int32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_s32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_u64u10__SVBool_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+svuint64x2_t test_svld2q_u64(svbool_t pg, const uint64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_u64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_s64u10__SVBool_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+svint64x2_t test_svld2q_s64(svbool_t pg, const int64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_s64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_f16u10__SVBool_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
+//
+svfloat16x2_t test_svld2q_f16(svbool_t pg, const float16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_f16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld2q_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
+//
+svbfloat16x2_t test_svld2q_bf16(svbool_t pg, const bfloat16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_bf16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_f32u10__SVBool_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
+//
+svfloat32x2_t test_svld2q_f32(svbool_t pg, const float32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_f32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_f64u10__SVBool_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
+//
+svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_f64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld2q_mf8u10__SVBool_tPKu6__mfp8(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svmfloat8x2_t test_svld2q_mf8(svbool_t pg, const mfloat8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q,,_mf8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svuint8x2_t test_svld2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum_,,u8,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svint8x2_t test_svld2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_s8,)(pg, base, vnum);
+}
+// CHECK-LABEL: @test_svld2q_vnum_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+svuint16x2_t test_svld2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_u16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+svint16x2_t test_svld2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_s16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+svuint32x2_t test_svld2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_u32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+svint32x2_t test_svld2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_s32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+svuint64x2_t test_svld2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_u64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+svint64x2_t test_svld2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_s64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
+//
+svfloat16x2_t test_svld2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_f16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld2q_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
+//
+svbfloat16x2_t test_svld2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_bf16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
+//
+svfloat32x2_t test_svld2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_f32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
+//
+svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_f64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld2q_vnum_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_mf8u10__SVBool_tPKu6__mfp8l(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svmfloat8x2_t test_svld2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld2q_vnum,,_mf8,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svld3q_u8u10__SVBool_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x3_t test_svld3q_u8(svbool_t pg, const uint8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_u8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svld3q_s8u10__SVBool_tPKa(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svint8x3_t test_svld3q_s8(svbool_t pg, const int8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_s8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_u16u10__SVBool_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+svuint16x3_t test_svld3q_u16(svbool_t pg, const uint16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_u16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_s16u10__SVBool_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+svint16x3_t test_svld3q_s16(svbool_t pg, const int16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_s16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_u32u10__SVBool_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+svuint32x3_t test_svld3q_u32(svbool_t pg, const uint32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_u32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_s32u10__SVBool_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+svint32x3_t test_svld3q_s32(svbool_t pg, const int32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_s32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_u64u10__SVBool_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+svuint64x3_t test_svld3q_u64(svbool_t pg, const uint64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_u64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_s64u10__SVBool_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+svint64x3_t test_svld3q_s64(svbool_t pg, const int64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_s64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_f16u10__SVBool_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
+//
+svfloat16x3_t test_svld3q_f16(svbool_t pg, const float16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_f16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld3q_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
+//
+svbfloat16x3_t test_svld3q_bf16(svbool_t pg, const bfloat16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_bf16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_f32u10__SVBool_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
+//
+svfloat32x3_t test_svld3q_f32(svbool_t pg, const float32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_f32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_f64u10__SVBool_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
+//
+svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_f64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld3q_mf8u10__SVBool_tPKu6__mfp8(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svmfloat8x3_t test_svld3q_mf8(svbool_t pg, const mfloat8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q,,_mf8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svuint8x3_t test_svld3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum_,,u8,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svint8x3_t test_svld3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_s8,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+svuint16x3_t test_svld3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_u16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+svint16x3_t test_svld3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_s16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+svuint32x3_t test_svld3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_u32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+svint32x3_t test_svld3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_s32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+svuint64x3_t test_svld3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_u64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+svint64x3_t test_svld3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_s64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
+//
+svfloat16x3_t test_svld3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_f16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld3q_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
+//
+svbfloat16x3_t test_svld3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_bf16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
+//
+svfloat32x3_t test_svld3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_f32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
+//
+svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_f64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld3q_vnum_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_mf8u10__SVBool_tPKu6__mfp8l(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svmfloat8x3_t test_svld3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld3q_vnum,,_mf8,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svld4q_u8u10__SVBool_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svld4q_u8(svbool_t pg, const uint8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_u8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_u16u10__SVBool_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+svuint16x4_t test_svld4q_u16(svbool_t pg, const uint16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_u16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_s16u10__SVBool_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
+//
+svint16x4_t test_svld4q_s16(svbool_t pg, const int16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_s16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_u32u10__SVBool_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+svuint32x4_t test_svld4q_u32(svbool_t pg, const uint32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_u32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_s32u10__SVBool_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
+//
+svint32x4_t test_svld4q_s32(svbool_t pg, const int32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_s32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_u64u10__SVBool_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+svuint64x4_t test_svld4q_u64(svbool_t pg, const uint64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_u64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_s64u10__SVBool_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
+//
+svint64x4_t test_svld4q_s64(svbool_t pg, const int64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_s64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_f16u10__SVBool_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
+//
+svfloat16x4_t test_svld4q_f16(svbool_t pg, const float16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_f16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld4q_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
+//
+svbfloat16x4_t test_svld4q_bf16(svbool_t pg, const bfloat16_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_bf16,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_f32u10__SVBool_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
+//
+svfloat32x4_t test_svld4q_f32(svbool_t pg, const float32_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_f32,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_f64u10__SVBool_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
+//
+svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_f64,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svld4q_mf8u10__SVBool_tPKu6__mfp8(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svmfloat8x4_t test_svld4q_mf8(svbool_t pg, const mfloat8_t *base) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q,,_mf8,)(pg, base);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svuint8x4_t test_svld4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum_,,u8,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svint8x4_t test_svld4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_s8,)(pg, base, vnum);
+}
+// CHECK-LABEL: @test_svld4q_vnum_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+svuint16x4_t test_svld4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_u16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
+//
+svint16x4_t test_svld4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_s16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+svuint32x4_t test_svld4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_u32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
+//
+svint32x4_t test_svld4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_s32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+svuint64x4_t test_svld4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_u64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
+//
+svint64x4_t test_svld4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_s64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
+//
+svfloat16x4_t test_svld4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_f16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld4q_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
+//
+svbfloat16x4_t test_svld4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_bf16,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
+//
+svfloat32x4_t test_svld4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_f32,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
+//
+svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_f64,)(pg, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld4q_vnum_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_mf8u10__SVBool_tPKu6__mfp8l(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
+//
+svmfloat8x4_t test_svld4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) ATTR
+{
+  return SVE_ACLE_FUNC(svld4q_vnum,,_mf8,)(pg, base, vnum);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c
index 7adb3d4940e7f..0a87469887df9 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c
@@ -15,1590 +15,6 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
-// CHECK-LABEL: @test_svld2q_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z14test_svld2q_u8u10__SVBool_tPKh(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svuint8x2_t test_svld2q_u8(svbool_t pg, const uint8_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_u8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z14test_svld2q_s8u10__SVBool_tPKa(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svint8x2_t test_svld2q_s8(svbool_t pg, const int8_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_s8,)(pg, base);
-}
-// CHECK-LABEL: @test_svld2q_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_u16u10__SVBool_tPKt(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-svuint16x2_t test_svld2q_u16(svbool_t pg, const uint16_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_u16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_s16u10__SVBool_tPKs(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-svint16x2_t test_svld2q_s16(svbool_t pg, const int16_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_s16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_u32u10__SVBool_tPKj(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-svuint32x2_t test_svld2q_u32(svbool_t pg, const uint32_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_u32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_s32u10__SVBool_tPKi(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-svint32x2_t test_svld2q_s32(svbool_t pg, const int32_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_s32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_u64u10__SVBool_tPKm(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-svuint64x2_t test_svld2q_u64(svbool_t pg, const uint64_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_u64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_s64u10__SVBool_tPKl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-svint64x2_t test_svld2q_s64(svbool_t pg, const int64_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_s64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_f16u10__SVBool_tPKDh(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
-//
-svfloat16x2_t test_svld2q_f16(svbool_t pg, const float16_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_f16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z16test_svld2q_bf16u10__SVBool_tPKu6__bf16(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
-//
-svbfloat16x2_t test_svld2q_bf16(svbool_t pg, const bfloat16_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_bf16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_f32u10__SVBool_tPKf(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
-//
-svfloat32x2_t test_svld2q_f32(svbool_t pg, const float32_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_f32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_f64u10__SVBool_tPKd(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
-//
-svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_f64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld2q_mf8u10__SVBool_tPKu6__mfp8(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svmfloat8x2_t test_svld2q_mf8(svbool_t pg, const mfloat8_t *base)
-{
-  return SVE_ACLE_FUNC(svld2q,,_mf8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_u8u10__SVBool_tPKhl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svuint8x2_t test_svld2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum_,,u8,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_s8u10__SVBool_tPKal(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svint8x2_t test_svld2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_s8,)(pg, base, vnum);
-}
-// CHECK-LABEL: @test_svld2q_vnum_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u16u10__SVBool_tPKtl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-svuint16x2_t test_svld2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_u16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s16u10__SVBool_tPKsl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-svint16x2_t test_svld2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_s16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u32u10__SVBool_tPKjl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-svuint32x2_t test_svld2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_u32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s32u10__SVBool_tPKil(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-svint32x2_t test_svld2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_s32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u64u10__SVBool_tPKml(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-svuint64x2_t test_svld2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_u64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s64u10__SVBool_tPKll(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-svint64x2_t test_svld2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_s64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f16u10__SVBool_tPKDhl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
-//
-svfloat16x2_t test_svld2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_f16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld2q_vnum_bf16u10__SVBool_tPKu6__bf16l(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
-//
-svbfloat16x2_t test_svld2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_bf16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f32u10__SVBool_tPKfl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
-//
-svfloat32x2_t test_svld2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_f32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f64u10__SVBool_tPKdl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
-//
-svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_f64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld2q_vnum_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_mf8u10__SVBool_tPKu6__mfp8l(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svmfloat8x2_t test_svld2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld2q_vnum,,_mf8,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z14test_svld3q_u8u10__SVBool_tPKh(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svuint8x3_t test_svld3q_u8(svbool_t pg, const uint8_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_u8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z14test_svld3q_s8u10__SVBool_tPKa(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svint8x3_t test_svld3q_s8(svbool_t pg, const int8_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_s8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_u16u10__SVBool_tPKt(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-svuint16x3_t test_svld3q_u16(svbool_t pg, const uint16_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_u16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_s16u10__SVBool_tPKs(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-svint16x3_t test_svld3q_s16(svbool_t pg, const int16_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_s16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_u32u10__SVBool_tPKj(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-svuint32x3_t test_svld3q_u32(svbool_t pg, const uint32_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_u32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_s32u10__SVBool_tPKi(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-svint32x3_t test_svld3q_s32(svbool_t pg, const int32_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_s32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_u64u10__SVBool_tPKm(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-svuint64x3_t test_svld3q_u64(svbool_t pg, const uint64_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_u64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_s64u10__SVBool_tPKl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-svint64x3_t test_svld3q_s64(svbool_t pg, const int64_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_s64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_f16u10__SVBool_tPKDh(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
-//
-svfloat16x3_t test_svld3q_f16(svbool_t pg, const float16_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_f16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z16test_svld3q_bf16u10__SVBool_tPKu6__bf16(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
-//
-svbfloat16x3_t test_svld3q_bf16(svbool_t pg, const bfloat16_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_bf16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_f32u10__SVBool_tPKf(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
-//
-svfloat32x3_t test_svld3q_f32(svbool_t pg, const float32_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_f32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_f64u10__SVBool_tPKd(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
-//
-svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_f64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld3q_mf8u10__SVBool_tPKu6__mfp8(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svmfloat8x3_t test_svld3q_mf8(svbool_t pg, const mfloat8_t *base)
-{
-  return SVE_ACLE_FUNC(svld3q,,_mf8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_u8u10__SVBool_tPKhl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svuint8x3_t test_svld3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum_,,u8,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_s8u10__SVBool_tPKal(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svint8x3_t test_svld3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_s8,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u16u10__SVBool_tPKtl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-svuint16x3_t test_svld3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_u16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s16u10__SVBool_tPKsl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-svint16x3_t test_svld3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_s16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u32u10__SVBool_tPKjl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-svuint32x3_t test_svld3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_u32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s32u10__SVBool_tPKil(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-svint32x3_t test_svld3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_s32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u64u10__SVBool_tPKml(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-svuint64x3_t test_svld3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_u64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s64u10__SVBool_tPKll(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-svint64x3_t test_svld3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_s64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f16u10__SVBool_tPKDhl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
-//
-svfloat16x3_t test_svld3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_f16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld3q_vnum_bf16u10__SVBool_tPKu6__bf16l(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
-//
-svbfloat16x3_t test_svld3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_bf16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f32u10__SVBool_tPKfl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
-//
-svfloat32x3_t test_svld3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_f32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f64u10__SVBool_tPKdl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
-//
-svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_f64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld3q_vnum_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_mf8u10__SVBool_tPKu6__mfp8l(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svmfloat8x3_t test_svld3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld3q_vnum,,_mf8,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z14test_svld4q_u8u10__SVBool_tPKh(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svuint8x4_t test_svld4q_u8(svbool_t pg, const uint8_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_u8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_u16u10__SVBool_tPKt(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-svuint16x4_t test_svld4q_u16(svbool_t pg, const uint16_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_u16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_s16u10__SVBool_tPKs(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]]
-//
-svint16x4_t test_svld4q_s16(svbool_t pg, const int16_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_s16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_u32u10__SVBool_tPKj(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-svuint32x4_t test_svld4q_u32(svbool_t pg, const uint32_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_u32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_s32u10__SVBool_tPKi(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]]
-//
-svint32x4_t test_svld4q_s32(svbool_t pg, const int32_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_s32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_u64u10__SVBool_tPKm(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-svuint64x4_t test_svld4q_u64(svbool_t pg, const uint64_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_u64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_s64u10__SVBool_tPKl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]]
-//
-svint64x4_t test_svld4q_s64(svbool_t pg, const int64_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_s64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_f16u10__SVBool_tPKDh(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]]
-//
-svfloat16x4_t test_svld4q_f16(svbool_t pg, const float16_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_f16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z16test_svld4q_bf16u10__SVBool_tPKu6__bf16(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]]
-//
-svbfloat16x4_t test_svld4q_bf16(svbool_t pg, const bfloat16_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_bf16,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_f32u10__SVBool_tPKf(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]]
-//
-svfloat32x4_t test_svld4q_f32(svbool_t pg, const float32_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_f32,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_f64u10__SVBool_tPKd(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]]
-//
-svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_f64,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-// CPP-CHECK-LABEL: @_Z15test_svld4q_mf8u10__SVBool_tPKu6__mfp8(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
-//
-svmfloat8x4_t test_svld4q_mf8(svbool_t pg, const mfloat8_t *base)
-{
-  return SVE_ACLE_FUNC(svld4q,,_mf8,)(pg, base);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_u8u10__SVBool_tPKhl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svuint8x4_t test_svld4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum_,,u8,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_s8u10__SVBool_tPKal(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svint8x4_t test_svld4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_s8,)(pg, base, vnum);
-}
-// CHECK-LABEL: @test_svld4q_vnum_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u16u10__SVBool_tPKtl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-svuint16x4_t test_svld4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_u16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s16u10__SVBool_tPKsl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4q.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]]
-//
-svint16x4_t test_svld4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_s16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u32u10__SVBool_tPKjl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-svuint32x4_t test_svld4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_u32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s32u10__SVBool_tPKil(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4q.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]]
-//
-svint32x4_t test_svld4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_s32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u64u10__SVBool_tPKml(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-svuint64x4_t test_svld4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_u64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s64u10__SVBool_tPKll(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4q.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]]
-//
-svint64x4_t test_svld4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_s64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f16u10__SVBool_tPKDhl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4q.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]]
-//
-svfloat16x4_t test_svld4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_f16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld4q_vnum_bf16u10__SVBool_tPKu6__bf16l(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4q.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]]
-//
-svbfloat16x4_t test_svld4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_bf16,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f32u10__SVBool_tPKfl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4q.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]]
-//
-svfloat32x4_t test_svld4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_f32,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f64u10__SVBool_tPKdl(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4q.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]]
-//
-svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_f64,)(pg, base, vnum);
-}
-
-// CHECK-LABEL: @test_svld4q_vnum_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_mf8u10__SVBool_tPKu6__mfp8l(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4q.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
-//
-svmfloat8x4_t test_svld4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum)
-{
-  return SVE_ACLE_FUNC(svld4q_vnum,,_mf8,)(pg, base, vnum);
-}
-
 // Gather for 128 bits
 // vector base + scalar offset
 // CHECK-LABEL: @test_svld1q_gather_u64base_offset_u64(
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
index eb38d14c78812..455ab25a343a5 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
@@ -21,6 +21,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i1> @test_svpmov_lane_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -33,7 +41,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpmov_lane_u8(svuint8_t zn) {
+svbool_t test_svpmov_lane_u8(svuint8_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u8)(zn, 0);
 }
 
@@ -49,7 +57,7 @@ svbool_t test_svpmov_lane_u8(svuint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpmov_lane_s8(svint8_t zn) {
+svbool_t test_svpmov_lane_s8(svint8_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s8)(zn, 0);
 }
 
@@ -67,7 +75,7 @@ svbool_t test_svpmov_lane_s8(svint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_lane_u16(svuint16_t zn) {
+svbool_t test_svpmov_lane_u16(svuint16_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u16)(zn, 0);
 }
 
@@ -85,7 +93,7 @@ svbool_t test_svpmov_lane_u16(svuint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_lane_s16(svint16_t zn) {
+svbool_t test_svpmov_lane_s16(svint16_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s16)(zn, 1);
 }
 
@@ -103,7 +111,7 @@ svbool_t test_svpmov_lane_s16(svint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_lane_u32(svuint32_t zn) {
+svbool_t test_svpmov_lane_u32(svuint32_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u32)(zn, 0);
 }
 
@@ -121,7 +129,7 @@ svbool_t test_svpmov_lane_u32(svuint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_lane_s32(svint32_t zn) {
+svbool_t test_svpmov_lane_s32(svint32_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s32)(zn, 3);
 }
 
@@ -139,7 +147,7 @@ svbool_t test_svpmov_lane_s32(svint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_lane_u64(svuint64_t zn) {
+svbool_t test_svpmov_lane_u64(svuint64_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u64)(zn, 0);
 }
 
@@ -157,7 +165,7 @@ svbool_t test_svpmov_lane_u64(svuint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_lane_s64(svint64_t zn) {
+svbool_t test_svpmov_lane_s64(svint64_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s64)(zn, 7);
 }
 
@@ -174,7 +182,7 @@ svbool_t test_svpmov_lane_s64(svint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpmov_u8(svuint8_t zn) {
+svbool_t test_svpmov_u8(svuint8_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _u8)(zn);
 }
 
@@ -190,7 +198,7 @@ svbool_t test_svpmov_u8(svuint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.zero.nxv16i8(<vscale x 16 x i8> [[ZN]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpmov_s8(svint8_t zn) {
+svbool_t test_svpmov_s8(svint8_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _s8)(zn);
 }
 
@@ -208,7 +216,7 @@ svbool_t test_svpmov_s8(svint8_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_u16(svuint16_t zn) {
+svbool_t test_svpmov_u16(svuint16_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _u16)(zn);
 }
 
@@ -226,7 +234,7 @@ svbool_t test_svpmov_u16(svuint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_s16(svint16_t zn) {
+svbool_t test_svpmov_s16(svint16_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _s16)(zn);
 }
 
@@ -244,7 +252,7 @@ svbool_t test_svpmov_s16(svint16_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_u32(svuint32_t zn) {
+svbool_t test_svpmov_u32(svuint32_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _u32)(zn);
 }
 
@@ -262,7 +270,7 @@ svbool_t test_svpmov_u32(svuint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_s32(svint32_t zn) {
+svbool_t test_svpmov_s32(svint32_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _s32)(zn);
 }
 
@@ -280,7 +288,7 @@ svbool_t test_svpmov_s32(svint32_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_u64(svuint64_t zn) {
+svbool_t test_svpmov_u64(svuint64_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _u64)(zn);
 }
 
@@ -298,7 +306,7 @@ svbool_t test_svpmov_u64(svuint64_t zn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpmov_s64(svint64_t zn) {
+svbool_t test_svpmov_s64(svint64_t zn) ATTR {
   return SVE_ACLE_FUNC(svpmov, _s64)(zn);
 }
 
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
index 13e2eb25b8372..c827c6b2956ac 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
@@ -21,6 +21,14 @@
 #define SVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // _m
 
 // CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svpmov_lane_u16_m
@@ -37,7 +45,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svpmov_lane_u16_m(svuint16_t zn, svbool_t pn) {
+svuint16_t test_svpmov_lane_u16_m(svuint16_t zn, svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u16, _m)(zn, pn, 1);
 }
 
@@ -55,7 +63,7 @@ svuint16_t test_svpmov_lane_u16_m(svuint16_t zn, svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i1> [[TMP0]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svpmov_lane_s16_m(svint16_t zn, svbool_t pn) {
+svint16_t test_svpmov_lane_s16_m(svint16_t zn, svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s16, _m)(zn, pn, 1);
 }
 
@@ -73,7 +81,7 @@ svint16_t test_svpmov_lane_s16_m(svint16_t zn, svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svpmov_lane_u32_m(svuint32_t zn, svbool_t pn) {
+svuint32_t test_svpmov_lane_u32_m(svuint32_t zn, svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u32, _m)(zn, pn, 1);
 }
 
@@ -91,7 +99,7 @@ svuint32_t test_svpmov_lane_u32_m(svuint32_t zn, svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i1> [[TMP0]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svpmov_lane_s32_m(svint32_t zn, svbool_t pn) {
+svint32_t test_svpmov_lane_s32_m(svint32_t zn, svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s32, _m)(zn, pn, 3);
 }
 
@@ -109,7 +117,7 @@ svint32_t test_svpmov_lane_s32_m(svint32_t zn, svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svpmov_lane_u64_m(svuint64_t zn, svbool_t pn) {
+svuint64_t test_svpmov_lane_u64_m(svuint64_t zn, svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _u64, _m)(zn, pn, 1);
 }
 
@@ -127,7 +135,7 @@ svuint64_t test_svpmov_lane_u64_m(svuint64_t zn, svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i1> [[TMP0]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svpmov_lane_s64_m(svint64_t zn, svbool_t pn) {
+svint64_t test_svpmov_lane_s64_m(svint64_t zn, svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_lane, _s64, _m)(zn, pn, 7);
 }
 
@@ -146,7 +154,7 @@ svint64_t test_svpmov_lane_s64_m(svint64_t zn, svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svpmov_lane_u8_z(svbool_t pn) {
+svuint8_t test_svpmov_lane_u8_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_u8, , _z)(pn);
 }
 
@@ -162,7 +170,7 @@ svuint8_t test_svpmov_lane_u8_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> [[PN]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svpmov_lane_s8_z(svbool_t pn) {
+svint8_t test_svpmov_lane_s8_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_s8, , _z)(pn);
 }
 
@@ -180,7 +188,7 @@ svint8_t test_svpmov_lane_s8_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svpmov_lane_u16_z(svbool_t pn) {
+svuint16_t test_svpmov_lane_u16_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_u16, , _z)(pn);
 }
 
@@ -198,7 +206,7 @@ svuint16_t test_svpmov_lane_u16_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svpmov_lane_s16_z(svbool_t pn) {
+svint16_t test_svpmov_lane_s16_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_s16, , _z)(pn);
 }
 
@@ -216,7 +224,7 @@ svint16_t test_svpmov_lane_s16_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svpmov_lane_u32_z(svbool_t pn) {
+svuint32_t test_svpmov_lane_u32_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_u32, , _z)(pn);
 }
 
@@ -234,7 +242,7 @@ svuint32_t test_svpmov_lane_u32_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svpmov_lane_s32_z(svbool_t pn) {
+svint32_t test_svpmov_lane_s32_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_s32, , _z)(pn);
 }
 
@@ -252,7 +260,7 @@ svint32_t test_svpmov_lane_s32_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svpmov_lane_u64_z(svbool_t pn) {
+svuint64_t test_svpmov_lane_u64_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_u64, , _z)(pn);
 }
 
@@ -270,7 +278,7 @@ svuint64_t test_svpmov_lane_u64_z(svbool_t pn) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svpmov_lane_s64_z(svbool_t pn) {
+svint64_t test_svpmov_lane_s64_z(svbool_t pn) ATTR {
   return SVE_ACLE_FUNC(svpmov_s64, , _z)(pn);
 }
 
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c
index 517d5f244a46f..c747fc025c74c 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c
@@ -14,1609 +14,6 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
-// CHECK-LABEL: @test_svst2q_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z14test_svst2q_u8u10__SVBool_tPKh11svuint8x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_u8(svbool_t pg, const uint8_t *base, svuint8x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_u8,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z14test_svst2q_s8u10__SVBool_tPKa10svint8x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_s8(svbool_t pg, const int8_t *base, svint8x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_s8,)(pg, base, zt);
-}
-// CHECK-LABEL: @test_svst2q_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_u16u10__SVBool_tPKt12svuint16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_u16(svbool_t pg, const uint16_t *base, svuint16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_u16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_s16u10__SVBool_tPKs11svint16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_s16(svbool_t pg, const int16_t *base, svint16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_s16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_u32u10__SVBool_tPKj12svuint32x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_u32(svbool_t pg, const uint32_t *base, svuint32x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_u32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_s32u10__SVBool_tPKi11svint32x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_s32(svbool_t pg, const int32_t *base, svint32x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_s32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_u64u10__SVBool_tPKm12svuint64x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_u64(svbool_t pg, const uint64_t *base, svuint64x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_u64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_s64u10__SVBool_tPKl11svint64x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_s64(svbool_t pg, const int64_t *base, svint64x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_s64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_f16u10__SVBool_tPKDh13svfloat16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_f16(svbool_t pg, const float16_t *base, svfloat16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_f16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svst2q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_bf16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_f32u10__SVBool_tPKf13svfloat32x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_f32(svbool_t pg, const float32_t *base, svfloat32x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_f32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_f64u10__SVBool_tPKd13svfloat64x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst2q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q,,_mf8,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_u8u10__SVBool_tPKhl11svuint8x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum_,,u8,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_s8u10__SVBool_tPKal10svint8x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_s8,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u16u10__SVBool_tPKtl12svuint16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_u16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s16u10__SVBool_tPKsl11svint16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_s16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u32u10__SVBool_tPKjl12svuint32x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_u32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s32u10__SVBool_tPKil11svint32x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_s32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u64u10__SVBool_tPKml12svuint64x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_u64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s64u10__SVBool_tPKll11svint64x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_s64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f16u10__SVBool_tPKDhl13svfloat16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_f16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z21test_svst2q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_bf16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f32u10__SVBool_tPKfl13svfloat32x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_f32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f64u10__SVBool_tPKdl13svfloat64x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst2q_vnum_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x2_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x2_t zt)
-{
-  SVE_ACLE_FUNC(svst2q_vnum,,_mf8,)(pg, base, vnum, zt);
-}
-
-//
-// ST3Q
-// CHECK-LABEL: @test_svst3q_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z14test_svst3q_u8u10__SVBool_tPKh11svuint8x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_u8(svbool_t pg, const uint8_t *base, svuint8x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_u8,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z14test_svst3q_s8u10__SVBool_tPKa10svint8x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_s8(svbool_t pg, const int8_t *base, svint8x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_s8,)(pg, base, zt);
-}
-// CHECK-LABEL: @test_svst3q_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_u16u10__SVBool_tPKt12svuint16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_u16(svbool_t pg, const uint16_t *base, svuint16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_u16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_s16u10__SVBool_tPKs11svint16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_s16(svbool_t pg, const int16_t *base, svint16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_s16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_u32u10__SVBool_tPKj12svuint32x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_u32(svbool_t pg, const uint32_t *base, svuint32x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_u32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_s32u10__SVBool_tPKi11svint32x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_s32(svbool_t pg, const int32_t *base, svint32x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_s32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_u64u10__SVBool_tPKm12svuint64x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_u64(svbool_t pg, const uint64_t *base, svuint64x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_u64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_s64u10__SVBool_tPKl11svint64x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_s64(svbool_t pg, const int64_t *base, svint64x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_s64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_f16u10__SVBool_tPKDh13svfloat16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_f16(svbool_t pg, const float16_t *base, svfloat16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_f16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svst3q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_bf16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_f32u10__SVBool_tPKf13svfloat32x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_f32(svbool_t pg, const float32_t *base, svfloat32x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_f32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_f64u10__SVBool_tPKd13svfloat64x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst3q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q,,_mf8,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_u8u10__SVBool_tPKhl11svuint8x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum_,,u8,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_s8u10__SVBool_tPKal10svint8x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_s8,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u16u10__SVBool_tPKtl12svuint16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_u16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s16u10__SVBool_tPKsl11svint16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_s16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u32u10__SVBool_tPKjl12svuint32x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_u32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s32u10__SVBool_tPKil11svint32x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_s32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u64u10__SVBool_tPKml12svuint64x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_u64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s64u10__SVBool_tPKll11svint64x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_s64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f16u10__SVBool_tPKDhl13svfloat16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_f16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z21test_svst3q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_bf16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f32u10__SVBool_tPKfl13svfloat32x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_f32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f64u10__SVBool_tPKdl13svfloat64x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst3q_vnum_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x3_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x3_t zt)
-{
-  SVE_ACLE_FUNC(svst3q_vnum,,_mf8,)(pg, base, vnum, zt);
-}
-
-//
-// ST4Q
-// CHECK-LABEL: @test_svst4q_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z14test_svst4q_u8u10__SVBool_tPKh11svuint8x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_u8(svbool_t pg, const uint8_t *base, svuint8x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_u8,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z14test_svst4q_s8u10__SVBool_tPKa10svint8x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_s8(svbool_t pg, const int8_t *base, svint8x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_s8,)(pg, base, zt);
-}
-// CHECK-LABEL: @test_svst4q_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_u16u10__SVBool_tPKt12svuint16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_u16(svbool_t pg, const uint16_t *base, svuint16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_u16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_s16u10__SVBool_tPKs11svint16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_s16(svbool_t pg, const int16_t *base, svint16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_s16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_u32u10__SVBool_tPKj12svuint32x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_u32(svbool_t pg, const uint32_t *base, svuint32x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_u32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_s32u10__SVBool_tPKi11svint32x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_s32(svbool_t pg, const int32_t *base, svint32x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_s32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_u64u10__SVBool_tPKm12svuint64x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_u64(svbool_t pg, const uint64_t *base, svuint64x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_u64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_s64u10__SVBool_tPKl11svint64x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_s64(svbool_t pg, const int64_t *base, svint64x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_s64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_f16u10__SVBool_tPKDh13svfloat16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_f16(svbool_t pg, const float16_t *base, svfloat16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_f16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svst4q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_bf16,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_f32u10__SVBool_tPKf13svfloat32x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_f32(svbool_t pg, const float32_t *base, svfloat32x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_f32,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_f64u10__SVBool_tPKd13svfloat64x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z15test_svst4q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q,,_mf8,)(pg, base, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_u8u10__SVBool_tPKhl11svuint8x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum_,,u8,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_s8u10__SVBool_tPKal10svint8x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_s8,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u16u10__SVBool_tPKtl12svuint16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_u16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s16u10__SVBool_tPKsl11svint16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_s16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u32u10__SVBool_tPKjl12svuint32x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_u32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s32u10__SVBool_tPKil11svint32x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_s32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_u64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u64u10__SVBool_tPKml12svuint64x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_u64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_s64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s64u10__SVBool_tPKll11svint64x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_s64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f16u10__SVBool_tPKDhl13svfloat16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_f16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_bf16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z21test_svst4q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_bf16,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f32u10__SVBool_tPKfl13svfloat32x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_f32,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_f64(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f64u10__SVBool_tPKdl13svfloat64x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt);
-}
-
-// CHECK-LABEL: @test_svst4q_vnum_mf8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x4_t(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svst4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x4_t zt)
-{
-  SVE_ACLE_FUNC(svst4q_vnum,,_mf8,)(pg, base, vnum, zt);
-}
-
 // Scatter for 128 bits
 // vector base + scalar offset
 // CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u64(
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c
new file mode 100644
index 0000000000000..5b37c2052fe2c
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c
@@ -0,0 +1,1626 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+// CHECK-LABEL: @test_svst2q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svst2q_u8u10__SVBool_tPKh11svuint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_u8(svbool_t pg, const uint8_t *base, svuint8x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_u8,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svst2q_s8u10__SVBool_tPKa10svint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_s8(svbool_t pg, const int8_t *base, svint8x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_s8,)(pg, base, zt);
+}
+// CHECK-LABEL: @test_svst2q_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_u16u10__SVBool_tPKt12svuint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_u16(svbool_t pg, const uint16_t *base, svuint16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_u16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_s16u10__SVBool_tPKs11svint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_s16(svbool_t pg, const int16_t *base, svint16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_s16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_u32u10__SVBool_tPKj12svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_u32(svbool_t pg, const uint32_t *base, svuint32x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_u32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_s32u10__SVBool_tPKi11svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_s32(svbool_t pg, const int32_t *base, svint32x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_s32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_u64u10__SVBool_tPKm12svuint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_u64(svbool_t pg, const uint64_t *base, svuint64x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_u64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_s64u10__SVBool_tPKl11svint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_s64(svbool_t pg, const int64_t *base, svint64x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_s64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_f16u10__SVBool_tPKDh13svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_f16(svbool_t pg, const float16_t *base, svfloat16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_f16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst2q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_bf16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_f32u10__SVBool_tPKf13svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_f32(svbool_t pg, const float32_t *base, svfloat32x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_f32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_f64u10__SVBool_tPKd13svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst2q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q,,_mf8,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_u8u10__SVBool_tPKhl11svuint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum_,,u8,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_s8u10__SVBool_tPKal10svint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_s8,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u16u10__SVBool_tPKtl12svuint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_u16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s16u10__SVBool_tPKsl11svint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_s16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u32u10__SVBool_tPKjl12svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_u32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s32u10__SVBool_tPKil11svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_s32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u64u10__SVBool_tPKml12svuint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_u64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s64u10__SVBool_tPKll11svint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_s64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f16u10__SVBool_tPKDhl13svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_f16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst2q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_bf16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f32u10__SVBool_tPKfl13svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_f32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f64u10__SVBool_tPKdl13svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst2q_vnum_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x2_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst2q_vnum,,_mf8,)(pg, base, vnum, zt);
+}
+
+//
+// ST3Q
+// CHECK-LABEL: @test_svst3q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svst3q_u8u10__SVBool_tPKh11svuint8x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_u8(svbool_t pg, const uint8_t *base, svuint8x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_u8,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svst3q_s8u10__SVBool_tPKa10svint8x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_s8(svbool_t pg, const int8_t *base, svint8x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_s8,)(pg, base, zt);
+}
+// CHECK-LABEL: @test_svst3q_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_u16u10__SVBool_tPKt12svuint16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_u16(svbool_t pg, const uint16_t *base, svuint16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_u16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_s16u10__SVBool_tPKs11svint16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_s16(svbool_t pg, const int16_t *base, svint16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_s16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_u32u10__SVBool_tPKj12svuint32x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_u32(svbool_t pg, const uint32_t *base, svuint32x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_u32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_s32u10__SVBool_tPKi11svint32x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_s32(svbool_t pg, const int32_t *base, svint32x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_s32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_u64u10__SVBool_tPKm12svuint64x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_u64(svbool_t pg, const uint64_t *base, svuint64x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_u64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_s64u10__SVBool_tPKl11svint64x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_s64(svbool_t pg, const int64_t *base, svint64x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_s64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_f16u10__SVBool_tPKDh13svfloat16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_f16(svbool_t pg, const float16_t *base, svfloat16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_f16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst3q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_bf16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_f32u10__SVBool_tPKf13svfloat32x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_f32(svbool_t pg, const float32_t *base, svfloat32x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_f32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_f64u10__SVBool_tPKd13svfloat64x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst3q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q,,_mf8,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_u8u10__SVBool_tPKhl11svuint8x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum_,,u8,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_s8u10__SVBool_tPKal10svint8x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_s8,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u16u10__SVBool_tPKtl12svuint16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_u16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s16u10__SVBool_tPKsl11svint16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_s16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u32u10__SVBool_tPKjl12svuint32x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_u32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s32u10__SVBool_tPKil11svint32x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_s32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u64u10__SVBool_tPKml12svuint64x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_u64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s64u10__SVBool_tPKll11svint64x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_s64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f16u10__SVBool_tPKDhl13svfloat16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_f16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst3q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_bf16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f32u10__SVBool_tPKfl13svfloat32x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_f32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f64u10__SVBool_tPKdl13svfloat64x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst3q_vnum_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x3_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x3_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst3q_vnum,,_mf8,)(pg, base, vnum, zt);
+}
+
+//
+// ST4Q
+// CHECK-LABEL: @test_svst4q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svst4q_u8u10__SVBool_tPKh11svuint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_u8(svbool_t pg, const uint8_t *base, svuint8x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_u8,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svst4q_s8u10__SVBool_tPKa10svint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_s8(svbool_t pg, const int8_t *base, svint8x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_s8,)(pg, base, zt);
+}
+// CHECK-LABEL: @test_svst4q_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_u16u10__SVBool_tPKt12svuint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_u16(svbool_t pg, const uint16_t *base, svuint16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_u16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_s16u10__SVBool_tPKs11svint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_s16(svbool_t pg, const int16_t *base, svint16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_s16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_u32u10__SVBool_tPKj12svuint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_u32(svbool_t pg, const uint32_t *base, svuint32x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_u32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_s32u10__SVBool_tPKi11svint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_s32(svbool_t pg, const int32_t *base, svint32x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_s32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_u64u10__SVBool_tPKm12svuint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_u64(svbool_t pg, const uint64_t *base, svuint64x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_u64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_s64u10__SVBool_tPKl11svint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_s64(svbool_t pg, const int64_t *base, svint64x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_s64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_f16u10__SVBool_tPKDh13svfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_f16(svbool_t pg, const float16_t *base, svfloat16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_f16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst4q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_bf16,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_f32u10__SVBool_tPKf13svfloat32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_f32(svbool_t pg, const float32_t *base, svfloat32x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_f32,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_f64u10__SVBool_tPKd13svfloat64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svst4q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q,,_mf8,)(pg, base, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_u8u10__SVBool_tPKhl11svuint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum_,,u8,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_s8u10__SVBool_tPKal10svint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_s8,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u16u10__SVBool_tPKtl12svuint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_u16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s16u10__SVBool_tPKsl11svint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_s16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u32u10__SVBool_tPKjl12svuint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_u32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s32u10__SVBool_tPKil11svint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_s32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u64u10__SVBool_tPKml12svuint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_u64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s64u10__SVBool_tPKll11svint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_s64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f16u10__SVBool_tPKDhl13svfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_f16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst4q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_bf16,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f32u10__SVBool_tPKfl13svfloat32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_f32,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f64u10__SVBool_tPKdl13svfloat64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt);
+}
+
+// CHECK-LABEL: @test_svst4q_vnum_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x4_t zt) ATTR
+{
+  SVE_ACLE_FUNC(svst4q_vnum,,_mf8,)(pg, base, vnum, zt);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
index 3d3bb0d17a50c..c1a7d2d443fba 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -21,6 +21,14 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtblq_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -33,7 +41,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) ATTR {
   return SVE_ACLE_FUNC(svtblq, _u8,,)(zn, zm);
 }
 
@@ -49,7 +57,7 @@ svuint8_t test_svtblq_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svtblq, _u16,,)(zn, zm);
 }
 
@@ -65,7 +73,7 @@ svuint16_t test_svtblq_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svtblq, _u32,,)(zn, zm);
 }
 
@@ -81,7 +89,7 @@ svuint32_t test_svtblq_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svtblq_u64(svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svtblq_u64(svuint64_t zn, svuint64_t zm) ATTR {
   return SVE_ACLE_FUNC(svtblq, _u64,,)(zn, zm);
 }
 
@@ -97,7 +105,7 @@ svuint64_t test_svtblq_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svtblq_s8(svint8_t zn, svuint8_t zm) {
+svint8_t test_svtblq_s8(svint8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _s8,,)(zn, zm);
 }
 
@@ -113,7 +121,7 @@ svint8_t test_svtblq_s8(svint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svtblq_s16(svint16_t zn, svuint16_t zm) {
+svint16_t test_svtblq_s16(svint16_t zn, svuint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _s16,,)(zn, zm);
 }
 
@@ -129,7 +137,7 @@ svint16_t test_svtblq_s16(svint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svtblq_s32(svint32_t zn, svuint32_t zm) {
+svint32_t test_svtblq_s32(svint32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _s32,,)(zn, zm);
 }
 
@@ -145,7 +153,7 @@ svint32_t test_svtblq_s32(svint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svtblq_s64(svint64_t zn, svuint64_t zm) {
+svint64_t test_svtblq_s64(svint64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _s64,,)(zn, zm);
 }
 
@@ -161,7 +169,7 @@ svint64_t test_svtblq_s64(svint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svtblq_f16(svfloat16_t zn, svuint16_t zm) {
+svfloat16_t test_svtblq_f16(svfloat16_t zn, svuint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _f16,,)(zn, zm);
 }
 
@@ -177,7 +185,7 @@ svfloat16_t test_svtblq_f16(svfloat16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svtblq_f32(svfloat32_t zn, svuint32_t zm) {
+svfloat32_t test_svtblq_f32(svfloat32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _f32,,)(zn, zm);
 }
 
@@ -193,7 +201,7 @@ svfloat32_t test_svtblq_f32(svfloat32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
+svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _f64,,)(zn, zm);
 }
 
@@ -209,7 +217,7 @@ svfloat64_t test_svtblq_f64(svfloat64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svtblq_bf16(svbfloat16_t zn, svuint16_t zm) {
+svbfloat16_t test_svtblq_bf16(svbfloat16_t zn, svuint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _bf16,,)(zn, zm);
 }
 
@@ -225,6 +233,6 @@ svbfloat16_t test_svtblq_bf16(svbfloat16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svtblq_mf8(svmfloat8_t zn, svuint8_t zm) {
+svmfloat8_t test_svtblq_mf8(svmfloat8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svtblq, _mf8,,)(zn, zm);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
index 674bd9cbb083b..f81d4c17606b3 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -21,6 +21,14 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svtbxq_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -33,7 +41,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) ATTR {
   return SVE_ACLE_FUNC(svtbxq, _u8,,)(passthru, zn, zm);
 }
 
@@ -49,7 +57,7 @@ svuint8_t test_svtbxq_u8(svuint8_t passthru, svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svtbxq, _u16,,)(passthru, zn, zm);
 }
 
@@ -65,7 +73,7 @@ svuint16_t test_svtbxq_u16(svuint16_t passthru, svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svtbxq, _u32,,)(passthru, zn, zm);
 }
 
@@ -81,7 +89,7 @@ svuint32_t test_svtbxq_u32(svuint32_t passthru, svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) ATTR {
   return SVE_ACLE_FUNC(svtbxq, _u64,,)(passthru, zn, zm);
 }
 
@@ -97,7 +105,7 @@ svuint64_t test_svtbxq_u64(svuint64_t passthru, svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) {
+svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _s8,,)(passthru, zn, zm);
 }
 
@@ -113,7 +121,7 @@ svint8_t test_svtbxq_s8(svint8_t passthru, svint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> [[PASSTHRU]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) {
+svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _s16,,)(passthru, zn, zm);
 }
 
@@ -129,7 +137,7 @@ svint16_t test_svtbxq_s16(svint16_t passthru, svint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> [[PASSTHRU]], <vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) {
+svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _s32,,)(passthru, zn, zm);
 }
 
@@ -145,7 +153,7 @@ svint32_t test_svtbxq_s32(svint32_t passthru, svint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> [[PASSTHRU]], <vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) {
+svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _s64,,)(passthru, zn, zm);
 }
 
@@ -161,7 +169,7 @@ svint64_t test_svtbxq_s64(svint64_t passthru, svint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> [[PASSTHRU]], <vscale x 8 x half> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm) {
+svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _f16,,)(passthru, zn, zm);
 }
 
@@ -177,7 +185,7 @@ svfloat16_t test_svtbxq_f16(svfloat16_t passthru, svfloat16_t zn, svuint16_t zm)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> [[PASSTHRU]], <vscale x 4 x float> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm) {
+svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _f32,,)(passthru, zn, zm);
 }
 
@@ -193,7 +201,7 @@ svfloat32_t test_svtbxq_f32(svfloat32_t passthru, svfloat32_t zn, svuint32_t zm)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> [[PASSTHRU]], <vscale x 2 x double> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm) {
+svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _f64,,)(passthru, zn, zm);
 }
 
@@ -209,7 +217,7 @@ svfloat64_t test_svtbxq_f64(svfloat64_t passthru, svfloat64_t zn, svuint64_t zm)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> [[PASSTHRU]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svtbxq_bf16(svbfloat16_t passthru, svbfloat16_t zn, svuint16_t zm) {
+svbfloat16_t test_svtbxq_bf16(svbfloat16_t passthru, svbfloat16_t zn, svuint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _bf16,,)(passthru, zn, zm);
 }
 
@@ -225,6 +233,6 @@ svbfloat16_t test_svtbxq_bf16(svbfloat16_t passthru, svbfloat16_t zn, svuint16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> [[PASSTHRU]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svtbxq_mf8(svmfloat8_t passthru, svmfloat8_t zn, svuint8_t zm) {
+svmfloat8_t test_svtbxq_mf8(svmfloat8_t passthru, svmfloat8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svtbxq, _mf8,,)(passthru, zn, zm);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index 35878d61f9549..cfdb651e4734d 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -20,6 +20,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq1_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_u8)(zn, zm);
 }
 
@@ -48,7 +56,7 @@ svuint8_t test_svuzpq1_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svuzpq1,_u16)(zn, zm);
 }
 
@@ -64,7 +72,7 @@ svuint16_t test_svuzpq1_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_u32)(zn, zm);
 }
 
@@ -80,7 +88,7 @@ svuint32_t test_svuzpq1_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_u64)(zn, zm);
 }
 
@@ -97,7 +105,7 @@ svuint64_t test_svuzpq1_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
+svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_s8)(zn, zm);
 }
 
@@ -113,7 +121,7 @@ svint8_t test_svuzpq1_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
+svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_s16)(zn, zm);
 }
 
@@ -129,7 +137,7 @@ svint16_t test_svuzpq1_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
+svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svuzpq1,_s32)(zn, zm);
 }
 
@@ -145,7 +153,7 @@ svint32_t test_svuzpq1_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
+svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_s64)(zn, zm);
 }
 
@@ -162,7 +170,7 @@ svint64_t test_svuzpq1_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
+svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_f16)(zn, zm);
 }
 
@@ -178,7 +186,7 @@ svfloat16_t test_svuzpq1_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
+svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_f32)(zn, zm);
 }
 
@@ -194,7 +202,7 @@ svfloat32_t test_svuzpq1_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
+svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_f64)(zn, zm);
 }
 
@@ -210,7 +218,7 @@ svfloat64_t test_svuzpq1_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_bf16)(zn, zm);
 }
 
@@ -226,7 +234,7 @@ svbfloat16_t test_svuzpq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svuzpq1_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+svmfloat8_t test_svuzpq1_mf8(svmfloat8_t zn, svmfloat8_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq1,_mf8)(zn, zm);
 }
 
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index a22f20f30039f..fd4678874cb5b 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -20,6 +20,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svuzpq2_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_u8)(zn, zm);
 }
 
@@ -48,7 +56,7 @@ svuint8_t test_svuzpq2_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svuzpq2,_u16)(zn, zm);
 }
 
@@ -64,7 +72,7 @@ svuint16_t test_svuzpq2_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_u32)(zn, zm);
 }
 
@@ -80,7 +88,7 @@ svuint32_t test_svuzpq2_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_u64)(zn, zm);
 }
 
@@ -97,7 +105,7 @@ svuint64_t test_svuzpq2_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
+svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_s8)(zn, zm);
 }
 
@@ -113,7 +121,7 @@ svint8_t test_svuzpq2_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
+svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_s16)(zn, zm);
 }
 
@@ -129,7 +137,7 @@ svint16_t test_svuzpq2_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
+svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svuzpq2,_s32)(zn, zm);
 }
 
@@ -145,7 +153,7 @@ svint32_t test_svuzpq2_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
+svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_s64)(zn, zm);
 }
 
@@ -162,7 +170,7 @@ svint64_t test_svuzpq2_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
+svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_f16)(zn, zm);
 }
 
@@ -178,7 +186,7 @@ svfloat16_t test_svuzpq2_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
+svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_f32)(zn, zm);
 }
 
@@ -194,7 +202,7 @@ svfloat32_t test_svuzpq2_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
+svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_f64)(zn, zm);
 }
 
@@ -210,7 +218,7 @@ svfloat64_t test_svuzpq2_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_bf16)(zn, zm);
 }
 
@@ -226,6 +234,6 @@ svbfloat16_t test_svuzpq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svuzpq2_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+svmfloat8_t test_svuzpq2_mf8(svmfloat8_t zn, svmfloat8_t zm) ATTR {
     return SVE_ACLE_FUNC(svuzpq2,_mf8)(zn, zm);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
index 4366973062697..20e145c16196e 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -20,6 +20,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq1_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_u8)(zn, zm);
 }
 
@@ -48,7 +56,7 @@ svuint8_t test_svzipq1_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svzipq1,_u16)(zn, zm);
 }
 
@@ -64,7 +72,7 @@ svuint16_t test_svzipq1_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_u32)(zn, zm);
 }
 
@@ -80,7 +88,7 @@ svuint32_t test_svzipq1_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_u64)(zn, zm);
 }
 
@@ -97,7 +105,7 @@ svuint64_t test_svzipq1_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
+svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_s8)(zn, zm);
 }
 
@@ -113,7 +121,7 @@ svint8_t test_svzipq1_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
+svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_s16)(zn, zm);
 }
 
@@ -129,7 +137,7 @@ svint16_t test_svzipq1_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
+svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svzipq1,_s32)(zn, zm);
 }
 
@@ -145,7 +153,7 @@ svint32_t test_svzipq1_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
+svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_s64)(zn, zm);
 }
 
@@ -162,7 +170,7 @@ svint64_t test_svzipq1_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
+svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_f16)(zn, zm);
 }
 
@@ -178,7 +186,7 @@ svfloat16_t test_svzipq1_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
+svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_f32)(zn, zm);
 }
 
@@ -194,7 +202,7 @@ svfloat32_t test_svzipq1_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
+svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_f64)(zn, zm);
 }
 
@@ -210,7 +218,7 @@ svfloat64_t test_svzipq1_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_bf16)(zn, zm);
 }
 
@@ -226,6 +234,6 @@ svbfloat16_t test_svzipq1_bf16(svbfloat16_t zn, svbfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svzipq1_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+svmfloat8_t test_svzipq1_mf8(svmfloat8_t zn, svmfloat8_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq1,_mf8)(zn, zm);
 }
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 4e27ec463c08e..894f27e1530b5 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -20,6 +20,14 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
 // CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svzipq2_u8
 // CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
+svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_u8)(zn, zm);
 }
 
@@ -48,7 +56,7 @@ svuint8_t test_svzipq2_u8(svuint8_t zn, svuint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
+svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) ATTR {
   return SVE_ACLE_FUNC(svzipq2,_u16)(zn, zm);
 }
 
@@ -64,7 +72,7 @@ svuint16_t test_svzipq2_u16(svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
+svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_u32)(zn, zm);
 }
 
@@ -80,7 +88,7 @@ svuint32_t test_svzipq2_u32(svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
+svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_u64)(zn, zm);
 }
 
@@ -97,7 +105,7 @@ svuint64_t test_svzipq2_u64(svuint64_t zn, svuint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
+svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_s8)(zn, zm);
 }
 
@@ -113,7 +121,7 @@ svint8_t test_svzipq2_s8(svint8_t zn, svint8_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
+svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_s16)(zn, zm);
 }
 
@@ -129,7 +137,7 @@ svint16_t test_svzipq2_s16(svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
+svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) ATTR {
   return SVE_ACLE_FUNC(svzipq2,_s32)(zn, zm);
 }
 
@@ -145,7 +153,7 @@ svint32_t test_svzipq2_s32(svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
+svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_s64)(zn, zm);
 }
 
@@ -162,7 +170,7 @@ svint64_t test_svzipq2_s64(svint64_t zn, svint64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
+svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_f16)(zn, zm);
 }
 
@@ -178,7 +186,7 @@ svfloat16_t test_svzipq2_f16(svfloat16_t zn, svfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
+svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_f32)(zn, zm);
 }
 
@@ -194,7 +202,7 @@ svfloat32_t test_svzipq2_f32(svfloat32_t zn, svfloat32_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
+svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_f64)(zn, zm);
 }
 
@@ -210,7 +218,7 @@ svfloat64_t test_svzipq2_f64(svfloat64_t zn, svfloat64_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
+svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_bf16)(zn, zm);
 }
 
@@ -226,6 +234,6 @@ svbfloat16_t test_svzipq2_bf16(svbfloat16_t zn, svbfloat16_t zm) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svmfloat8_t test_svzipq2_mf8(svmfloat8_t zn, svmfloat8_t zm) {
+svmfloat8_t test_svzipq2_mf8(svmfloat8_t zn, svmfloat8_t zm) ATTR {
     return SVE_ACLE_FUNC(svzipq2,_mf8)(zn, zm);
 }

>From 5791cd5e415d995f62e18d706b5bec3a82530957 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Mon, 7 Jul 2025 17:44:16 +0100
Subject: [PATCH 2/2] [Clang][AArch64] Add missing builtins for
 __ARM_FEATURE_SME2p1.

The quadword vector instructions introduced by SVE2p1/SME2p1 but the
builtins were not available to streaming mode.

RAX1 is available in streaming mode when SME2p1 is available.
---
 clang/include/clang/Basic/arm_sve.td          | 114 +++++++++---------
 .../AArch64/sve2-intrinsics/acle_sve2_rax1.c  |  10 +-
 .../sve2p1-intrinsics/acle_sve2p1_dupq.c      |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_extq.c      |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_fp_reduce.c |   3 +
 .../acle_sve2p1_int_reduce.c                  |   3 +
 .../acle_sve2p1_load_struct.c                 |   3 +
 .../acle_sve2p1_pmov_to_pred.c                |   6 +
 .../acle_sve2p1_pmov_to_vector.c              |   6 +
 .../acle_sve2p1_store_struct.c                |   3 +
 .../sve2p1-intrinsics/acle_sve2p1_tblq.c      |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_tbxq.c      |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_uzpq1.c     |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_uzpq2.c     |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_zipq1.c     |   6 +
 .../sve2p1-intrinsics/acle_sve2p1_zipq2.c     |   6 +
 .../acle_sve2_aes_bitperm_sha3_sm4.cpp        |   9 +-
 17 files changed, 141 insertions(+), 64 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index ac6f89f9afdbc..f0a8b32bf2f88 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -320,16 +320,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
   // Load one vector (scalar base + vector offset)
   def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">;
 
-  // Load N-element structure into N vectors (scalar base)
-  defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">;
-  defm SVLD3Q : StructLoad<"svld3q[_{2}]", "3Pc", "aarch64_sve_ld3q_sret">;
-  defm SVLD4Q : StructLoad<"svld4q[_{2}]", "4Pc", "aarch64_sve_ld4q_sret">;
-
-  // Load N-element structure into N vectors (scalar base, VL displacement)
-  defm SVLD2Q_VNUM : StructLoad<"svld2q_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2q_sret">;
-  defm SVLD3Q_VNUM : StructLoad<"svld3q_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3q_sret">;
-  defm SVLD4Q_VNUM : StructLoad<"svld4q_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4q_sret">;
-
   // Load quadwords (scalar base + vector index)
   def SVLD1Q_GATHER_INDICES_U : MInst<"svld1q_gather_[{3}]index[_{d}]", "dPcg", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_index">;
 
@@ -337,6 +327,18 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
   def SVLD1Q_GATHER_INDEX_S   : MInst<"svld1q_gather[_{2}base]_index_{d}", "dPgl", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">;
 }
 
+let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
+  // Load N-element structure into N vectors (scalar base)
+  defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret", [VerifyRuntimeMode]>;
+  defm SVLD3Q : StructLoad<"svld3q[_{2}]", "3Pc", "aarch64_sve_ld3q_sret", [VerifyRuntimeMode]>;
+  defm SVLD4Q : StructLoad<"svld4q[_{2}]", "4Pc", "aarch64_sve_ld4q_sret", [VerifyRuntimeMode]>;
+
+  // Load N-element structure into N vectors (scalar base, VL displacement)
+  defm SVLD2Q_VNUM : StructLoad<"svld2q_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2q_sret", [VerifyRuntimeMode]>;
+  defm SVLD3Q_VNUM : StructLoad<"svld3q_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3q_sret", [VerifyRuntimeMode]>;
+  defm SVLD4Q_VNUM : StructLoad<"svld4q_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4q_sret", [VerifyRuntimeMode]>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Stores
 
@@ -477,16 +479,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
   def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">;
   def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">;
 
-  // Store N vectors into N-element structure (scalar base)
-  defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">;
-  defm SVST3Q : StructStore<"svst3q[_{d}]", "vPc3", "aarch64_sve_st3q">;
-  defm SVST4Q : StructStore<"svst4q[_{d}]", "vPc4", "aarch64_sve_st4q">;
-
-  // Store N vectors into N-element structure (scalar base, VL displacement)
-  defm SVST2Q_VNUM : StructStore<"svst2q_vnum[_{d}]", "vPcl2", "aarch64_sve_st2q">;
-  defm SVST3Q_VNUM : StructStore<"svst3q_vnum[_{d}]", "vPcl3", "aarch64_sve_st3q">;
-  defm SVST4Q_VNUM : StructStore<"svst4q_vnum[_{d}]", "vPcl4", "aarch64_sve_st4q">;
-
   // Scatter store quadwords (scalar base + vector index)
   def SVST1Q_SCATTER_INDICES_U : MInst<"svst1q_scatter_[{3}]index[_{d}]", "vPpgd", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_index">;
   def SVST1Q_SCATTER_INDICES_S : MInst<"svst1q_scatter_[{3}]index[_{d}]", "vPp#d", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_index">;
@@ -495,6 +487,18 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
   def SVST1Q_SCATTER_INDEX_S   : MInst<"svst1q_scatter[_{2}base]_index[_{d}]", "vPgld", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">;
 }
 
+let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
+  // Store N vectors into N-element structure (scalar base)
+  defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q", [VerifyRuntimeMode]>;
+  defm SVST3Q : StructStore<"svst3q[_{d}]", "vPc3", "aarch64_sve_st3q", [VerifyRuntimeMode]>;
+  defm SVST4Q : StructStore<"svst4q[_{d}]", "vPc4", "aarch64_sve_st4q", [VerifyRuntimeMode]>;
+
+  // Store N vectors into N-element structure (scalar base, VL displacement)
+  defm SVST2Q_VNUM : StructStore<"svst2q_vnum[_{d}]", "vPcl2", "aarch64_sve_st2q", [VerifyRuntimeMode]>;
+  defm SVST3Q_VNUM : StructStore<"svst3q_vnum[_{d}]", "vPcl3", "aarch64_sve_st3q", [VerifyRuntimeMode]>;
+  defm SVST4Q_VNUM : StructStore<"svst4q_vnum[_{d}]", "vPcl4", "aarch64_sve_st4q", [VerifyRuntimeMode]>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Prefetches
 
@@ -1979,8 +1983,8 @@ def SVPMULLT_PAIR_U64   : SInst<"svpmullt_pair[_{d}]",   "ddd", "Ul", MergeNone,
 def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair">;
 }
 
-let SVETargetGuard = "sve2-sha3", SMETargetGuard = InvalidMode in { // FIXME: valid from sme2p1.
-def SVRAX1   : SInst<"svrax1[_{d}]",   "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone]>;
+let SVETargetGuard = "sve-sha3", SMETargetGuard = "sve-sha3,sme2p1" in {
+def SVRAX1   : SInst<"svrax1[_{d}]",   "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>;
 }
 
 let SVETargetGuard = "sve2-sm4", SMETargetGuard = InvalidMode in {
@@ -2005,21 +2009,21 @@ def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [VerifyRunt
 }
 
 // Standalone sve2.1 builtins
-let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
-def SVORQV   : SInst<"svorqv[_{d}]",  "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orqv",   [IsReductionQV]>;
-def SVEORQV  : SInst<"sveorqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorqv",  [IsReductionQV]>;
-def SVADDQV  : SInst<"svaddqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_addqv",  [IsReductionQV]>;
-def SVANDQV  : SInst<"svandqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andqv",  [IsReductionQV]>;
-def SVSMAXQV : SInst<"svmaxqv[_{d}]", "{Pd", "csil",         MergeNone, "aarch64_sve_smaxqv", [IsReductionQV]>;
-def SVUMAXQV : SInst<"svmaxqv[_{d}]", "{Pd", "UcUsUiUl",     MergeNone, "aarch64_sve_umaxqv", [IsReductionQV]>;
-def SVSMINQV : SInst<"svminqv[_{d}]", "{Pd", "csil",         MergeNone, "aarch64_sve_sminqv", [IsReductionQV]>;
-def SVUMINQV : SInst<"svminqv[_{d}]", "{Pd", "UcUsUiUl",     MergeNone, "aarch64_sve_uminqv", [IsReductionQV]>;
-
-def SVFADDQV   : SInst<"svaddqv[_{d}]",   "{Pd", "hfd", MergeNone, "aarch64_sve_faddqv",   [IsReductionQV]>;
-def SVFMAXNMQV : SInst<"svmaxnmqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fmaxnmqv", [IsReductionQV]>;
-def SVFMINNMQV : SInst<"svminnmqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fminnmqv", [IsReductionQV]>;
-def SVFMAXQV   : SInst<"svmaxqv[_{d}]",   "{Pd", "hfd", MergeNone, "aarch64_sve_fmaxqv",   [IsReductionQV]>;
-def SVFMINQV   : SInst<"svminqv[_{d}]",   "{Pd", "hfd", MergeNone, "aarch64_sve_fminqv",   [IsReductionQV]>;
+let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
+def SVORQV   : SInst<"svorqv[_{d}]",  "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orqv",   [IsReductionQV, VerifyRuntimeMode]>;
+def SVEORQV  : SInst<"sveorqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorqv",  [IsReductionQV, VerifyRuntimeMode]>;
+def SVADDQV  : SInst<"svaddqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_addqv",  [IsReductionQV, VerifyRuntimeMode]>;
+def SVANDQV  : SInst<"svandqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andqv",  [IsReductionQV, VerifyRuntimeMode]>;
+def SVSMAXQV : SInst<"svmaxqv[_{d}]", "{Pd", "csil",         MergeNone, "aarch64_sve_smaxqv", [IsReductionQV, VerifyRuntimeMode]>;
+def SVUMAXQV : SInst<"svmaxqv[_{d}]", "{Pd", "UcUsUiUl",     MergeNone, "aarch64_sve_umaxqv", [IsReductionQV, VerifyRuntimeMode]>;
+def SVSMINQV : SInst<"svminqv[_{d}]", "{Pd", "csil",         MergeNone, "aarch64_sve_sminqv", [IsReductionQV, VerifyRuntimeMode]>;
+def SVUMINQV : SInst<"svminqv[_{d}]", "{Pd", "UcUsUiUl",     MergeNone, "aarch64_sve_uminqv", [IsReductionQV, VerifyRuntimeMode]>;
+
+def SVFADDQV   : SInst<"svaddqv[_{d}]",   "{Pd", "hfd", MergeNone, "aarch64_sve_faddqv",   [IsReductionQV, VerifyRuntimeMode]>;
+def SVFMAXNMQV : SInst<"svmaxnmqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fmaxnmqv", [IsReductionQV, VerifyRuntimeMode]>;
+def SVFMINNMQV : SInst<"svminnmqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fminnmqv", [IsReductionQV, VerifyRuntimeMode]>;
+def SVFMAXQV   : SInst<"svmaxqv[_{d}]",   "{Pd", "hfd", MergeNone, "aarch64_sve_fmaxqv",   [IsReductionQV, VerifyRuntimeMode]>;
+def SVFMINQV   : SInst<"svminqv[_{d}]",   "{Pd", "hfd", MergeNone, "aarch64_sve_fminqv",   [IsReductionQV, VerifyRuntimeMode]>;
 }
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2281,17 +2285,17 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
   def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i",  MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
 }
 
-let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
+let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
   // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
-  def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [], []>;
-  def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [], []>;
-  def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq1", [], []>;
-  def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq2", [], []>;
+  def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>;
+  def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>;
+  def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq1", [VerifyRuntimeMode], []>;
+  def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq2", [VerifyRuntimeMode], []>;
   // TBLQ, TBXQ
-  def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tblq">;
-  def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tbxq">;
+  def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tblq", [VerifyRuntimeMode]>;
+  def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tbxq", [VerifyRuntimeMode]>;
   // EXTQ
-  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_extq", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
   // PMOV
   // Move to Pred
@@ -2299,23 +2303,23 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
     def _LANE      : Inst<name # "_lane[_{d}]", "Pdi", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>;
     def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>;
   }
-  defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_0>;
-  defm SVPMOV_H_TO_PRED : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_1>;
-  defm SVPMOV_S_TO_PRED : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_3>;
-  defm SVPMOV_D_TO_PRED : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [], ImmCheck0_7>;
+  defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_0>;
+  defm SVPMOV_H_TO_PRED : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_1>;
+  defm SVPMOV_S_TO_PRED : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_3>;
+  defm SVPMOV_D_TO_PRED : PMOV_TO_PRED<"svpmov", "lUl", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_7>;
 
   // Move to Vector
   multiclass PMOV_TO_VEC<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
     def _M : SInst<name # "_lane[_{d}]", "ddPi", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>;
     def _Z : SInst<name # "_{d}_z", "dP",  types, MergeNone, intrinsic # "_zeroing", flags, []>;
   }
-  def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP",  "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [], []>;
-  defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_1>;
-  defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>;
-  defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
+  def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP",  "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [VerifyRuntimeMode], []>;
+  defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [VerifyRuntimeMode], ImmCheck1_1>;
+  defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [VerifyRuntimeMode], ImmCheck1_3>;
+  defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[VerifyRuntimeMode], ImmCheck1_7>;
 }
 
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2p1" in {
+let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
   // DUPQ
   def SVDUP_LANEQ_B  : SInst<"svdup_laneq[_{d}]", "ddi",  "cUcm", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>;
   def SVDUP_LANEQ_H  : SInst<"svdup_laneq[_{d}]", "ddi",  "sUsh", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
@@ -2323,7 +2327,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2p1" in {
   def SVDUP_LANEQ_D  : SInst<"svdup_laneq[_{d}]", "ddi",  "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
 }
 
-let SVETargetGuard = "sve2p1,bf16", SMETargetGuard = "sme2p1,bf16" in {
+let SVETargetGuard = "(sve2p1|sme2p1),bf16", SMETargetGuard = "(sve2p1|sme2p1),bf16" in {
   def SVDUP_LANEQ_BF16  : SInst<"svdup_laneq[_{d}]", "ddi",  "b", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
 }
 
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
index 82a4d2a8f44a0..42bc37b9ef17c 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rax1.c
@@ -1,8 +1,10 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sme2p1 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-sha3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
index bde1797235162..c9ddb324d843b 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
index 64058ef08ff97..38769aeee8b2b 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
index 3544bde378e2f..5b171087b40a2 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
@@ -1,6 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c
index 4aea5890826bb..7abd5210b0be0 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_int_reduce.c
@@ -1,6 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c
index 8c41b5d91cc53..5119bc085bc78 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c
@@ -1,6 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
index 455ab25a343a5..59d9bfb84b9c8 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_pred.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
index c827c6b2956ac..f9da0979b6f2f 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pmov_to_vector.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c
index 5b37c2052fe2c..863189c5051eb 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c
@@ -1,6 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
index c1a7d2d443fba..5e8007f952538 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
index f81d4c17606b3..ee44ff57cee03 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
index cfdb651e4734d..c059ee00fb8eb 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
index fd4678874cb5b..c7d2a0967d809 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
index 20e145c16196e..1d6f90fb9e514 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
index 894f27e1530b5..b900507d43259 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c
@@ -2,6 +2,12 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\
+// RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
 // RUN:   -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
index d165b92ffc8e4..6b97fa214dc7e 100644
--- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
@@ -119,11 +119,10 @@ void test(uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64)
   // expected-error at +2 {{'svpmullt_pair_n_u64' needs target feature sve,sve2,sve-aes}}
   // overload-error at +1 {{'svpmullt_pair' needs target feature sve,sve2,sve-aes}}
   SVE_ACLE_FUNC(svpmullt_pair,_n_u64,,)(svundef_u64(), u64);
-  // expected-error at +2 {{'svrax1_u64' needs target feature sve,sve2-sha3}}
-  // overload-error at +1 {{'svrax1' needs target feature sve,sve2-sha3}}
+  // expected-error at +2 {{'svrax1_u64' needs target feature (sve,sve-sha3)|(sme,sve-sha3,sme2p1)}}
+  // overload-error at +1 {{'svrax1' needs target feature (sve,sve-sha3)|(sme,sve-sha3,sme2p1)}}
   SVE_ACLE_FUNC(svrax1,_u64,,)(svundef_u64(), svundef_u64());
-
-  // expected-error at +2 {{'svrax1_s64' needs target feature sve,sve2-sha3}}
-  // overload-error at +1 {{'svrax1' needs target feature sve,sve2-sha3}}
+  // expected-error at +2 {{'svrax1_s64' needs target feature (sve,sve-sha3)|(sme,sve-sha3,sme2p1)}}
+  // overload-error at +1 {{'svrax1' needs target feature (sve,sve-sha3)|(sme,sve-sha3,sme2p1)}}
   SVE_ACLE_FUNC(svrax1,_s64,,)(svundef_s64(), svundef_s64());
 }