[clang] 6dc94c5 - [Clang][AArch64][SME] Add vector read/write (mova) intrinsics
Bryan Chan via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 20 02:57:40 PDT 2023
Author: Bryan Chan
Date: 2023-07-20T06:06:33-04:00
New Revision: 6dc94c54e51f416d03c42e7d61624419e03a3af6
URL: https://github.com/llvm/llvm-project/commit/6dc94c54e51f416d03c42e7d61624419e03a3af6
DIFF: https://github.com/llvm/llvm-project/commit/6dc94c54e51f416d03c42e7d61624419e03a3af6.diff
LOG: [Clang][AArch64][SME] Add vector read/write (mova) intrinsics
This patch adds support for the following SME ACLE intrinsics (as defined
in https://arm-software.github.io/acle/main/acle.html):
- svread_hor_za8[_s8]_m // also for u8
- svread_hor_za16[_s16]_m // also for u16, f16, bf16
- svread_hor_za32[_s32]_m // also for u32, f32
- svread_hor_za64[_s64]_m // also for u64, f64
- svread_hor_za128[_s8]_m // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
- svread_ver_za8[_s8]_m // also for u8
- svread_ver_za16[_s16]_m // also for u16, f16, bf16
- svread_ver_za32[_s32]_m // also for u32, f32
- svread_ver_za64[_s64]_m // also for u64, f64
- svread_ver_za128[_s8]_m // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
- svwrite_hor_za8[_s8]_m // also for u8
- svwrite_hor_za16[_s16]_m // also for u16, f16, bf16
- svwrite_hor_za32[_s32]_m // also for u32, f32
- svwrite_hor_za64[_s64]_m // also for u64, f64
- svwrite_hor_za128[_s8]_m // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
- svwrite_ver_za8[_s8]_m // also for u8
- svwrite_ver_za16[_s16]_m // also for u16, f16, bf16
- svwrite_ver_za32[_s32]_m // also for u32, f32
- svwrite_ver_za64[_s64]_m // also for u64, f64
- svwrite_ver_za128[_s8]_m // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
Co-authored-by: Sagar Kulkarni <sagar.kulkarni1 at huawei.com>
Reviewed By: sdesmalen, kmclaughlin
Differential Revision: https://reviews.llvm.org/D128648
Added:
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
Modified:
clang/include/clang/Basic/TargetBuiltins.h
clang/include/clang/Basic/arm_sme.td
clang/include/clang/Basic/arm_sve_sme_incl.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
clang/utils/TableGen/SveEmitter.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 3597d28a8def26..8f7881abf26f7f 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -307,6 +307,8 @@ namespace clang {
bool isTupleCreate() const { return Flags & IsTupleCreate; }
bool isTupleGet() const { return Flags & IsTupleGet; }
bool isTupleSet() const { return Flags & IsTupleSet; }
+ bool isReadZA() const { return Flags & IsReadZA; }
+ bool isWriteZA() const { return Flags & IsWriteZA; }
uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 4b174412aaffed..4f82615bfb9d95 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -72,3 +72,45 @@ defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck
defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Read horizontal/vertical ZA slices
+
+multiclass ZARead<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
+ let TargetGuard = "sme" in {
+ def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPimi", t,
+ MergeOp1, i_prefix # "_horiz",
+ [IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>;
+
+ def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPimi", t,
+ MergeOp1, i_prefix # "_vert",
+ [IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>;
+ }
+}
+
+defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>, ImmCheck<4, ImmCheck0_15>]>;
+defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>, ImmCheck<4, ImmCheck0_7>]>;
+defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>;
+defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>, ImmCheck<4, ImmCheck0_0>]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Write horizontal/vertical ZA slices
+
+multiclass ZAWrite<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
+ let TargetGuard = "sme" in {
+ def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimiPd", t,
+ MergeOp1, i_prefix # "_horiz",
+ [IsWriteZA, IsStreaming, IsSharedZA], ch>;
+
+ def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimiPd", t,
+ MergeOp1, i_prefix # "_vert",
+ [IsWriteZA, IsStreaming, IsSharedZA], ch>;
+ }
+}
+
+defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 6b1541d334a253..e664b8b3e3f63a 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -220,6 +220,8 @@ def IsStreaming : FlagType<0x2000000000>;
def IsStreamingCompatible : FlagType<0x4000000000>;
def IsSharedZA : FlagType<0x8000000000>;
def IsPreservesZA : FlagType<0x10000000000>;
+def IsReadZA : FlagType<0x20000000000>;
+def IsWriteZA : FlagType<0x40000000000>;
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
class ImmCheckType<int val> {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6ab2c1d9f8480d..9d9ad162c6f5d3 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9478,6 +9478,23 @@ Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, NewOps);
}
+Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ auto *VecTy = getSVEType(TypeFlags);
+ Function *F = CGM.getIntrinsic(IntID, VecTy);
+ if (TypeFlags.isReadZA()) {
+ Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
+ Ops[3] = EmitTileslice(Ops[4], Ops[3]);
+ Ops.erase(&Ops[4]);
+ } else if (TypeFlags.isWriteZA()) {
+ Ops[1] = EmitTileslice(Ops[2], Ops[1]);
+ Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy);
+ Ops.erase(&Ops[3]);
+ }
+ return Builder.CreateCall(F, Ops);
+}
+
// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
@@ -9936,6 +9953,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
SVETypeFlags TypeFlags(Builtin->TypeModifier);
if (TypeFlags.isLoad() || TypeFlags.isStore())
return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
+ return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
/// Should not happen
return nullptr;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index bac1844a994eef..246050b341d67b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4280,6 +4280,9 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
+ llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
new file mode 100644
index 00000000000000..19e2b42e13f2d6
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
@@ -0,0 +1,1078 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-C-LABEL: @test_svread_hor_za8_s8(
+// CHECK-CXX-LABEL: @_Z22test_svread_hor_za8_s8u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za8_s8_1(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za8_s8_1u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 15);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_s16(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_s16u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_s16_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_s16_1u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za32_s32(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_s32u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za32_s32_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_s32_1u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice_base, 3);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za64_s64(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_s64u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za64_s64_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_s64_1u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice_base, 1);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za8_u8(
+// CHECK-CXX-LABEL: @_Z22test_svread_hor_za8_u8u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za8_u8_1(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za8_u8_1u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 15);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_u16(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_u16u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_u16_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_u16_1u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za32_u32(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_u32u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za32_u32_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_u32_1u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice_base, 3);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za64_u64(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_u64u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za64_u64_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_u64_1u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice_base, 1);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_f16(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_f16u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_f16_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_f16_1u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_bf16(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za16_bf16u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za16_bf16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za32_f32(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_f32u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za32_f32_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_f32_1u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice_base, 3);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za64_f64(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_f64u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za64_f64_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_f64_1u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice_base, 1);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s8(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za128_s8u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s8_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_s8_1u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s16(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s16u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s16_1u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s32(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s32u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s32_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s32_1u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s64(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s64u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_s64_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s64_1u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u8(
+// CHECK-CXX-LABEL: @_Z24test_svread_hor_za128_u8u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u8_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_u8_1u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u16(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u16u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u16_1u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u32(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u32u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u32_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u32_1u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u64(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u64u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_u64_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u64_1u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_f16(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f16u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_f16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f16_1u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_bf16(
+// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_bf16u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_bf16_1(
+// CHECK-CXX-LABEL: @_Z28test_svread_hor_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_f32(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f32u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_f32_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f32_1u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_f64(
+// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f64u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_hor_za128_f64_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f64_1u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za8_s8(
+// CHECK-CXX-LABEL: @_Z22test_svread_ver_za8_s8u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za8_s8_1(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za8_s8_1u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 15);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_s16(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_s16u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_s16_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_s16_1u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za32_s32(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_s32u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za32_s32_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_s32_1u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice_base, 3);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za64_s64(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_s64u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za64_s64_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_s64_1u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice_base, 1);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za8_u8(
+// CHECK-CXX-LABEL: @_Z22test_svread_ver_za8_u8u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za8_u8_1(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za8_u8_1u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 15);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_u16(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_u16u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_u16_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_u16_1u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za32_u32(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_u32u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za32_u32_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_u32_1u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice_base, 3);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za64_u64(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_u64u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za64_u64_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_u64_1u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice_base, 1);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_f16(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_f16u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_f16_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_f16_1u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_bf16(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za16_bf16u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za16_bf16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice_base, 7);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za32_f32(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_f32u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za32_f32_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_f32_1u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice_base, 3);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za64_f64(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_f64u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za64_f64_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_f64_1u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[TILESLICE]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice_base, 1);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s8(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za128_s8u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s8_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_s8_1u10__SVInt8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s16(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s16u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s16_1u11__SVInt16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s32(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s32u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s32_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s32_1u11__SVInt32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s64(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s64u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_s64_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s64_1u11__SVInt64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u8(
+// CHECK-CXX-LABEL: @_Z24test_svread_ver_za128_u8u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u8_1(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_u8_1u11__SVUint8_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u16(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u16u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u16_1u12__SVUint16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u32(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u32u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u32_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u32_1u12__SVUint32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u64(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u64u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_u64_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u64_1u12__SVUint64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_f16(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f16u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_f16_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f16_1u13__SVFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_bf16(
+// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_bf16u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_bf16_1(
+// CHECK-CXX-LABEL: @_Z28test_svread_ver_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD:%.*]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_f32(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f32u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_f32_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f32_1u13__SVFloat32_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD:%.*]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_f64(
+// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f64u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base, 0);
+}
+
+// CHECK-C-LABEL: @test_svread_ver_za128_f64_1(
+// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f64_1u13__SVFloat64_tu10__SVBool_tj(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD:%.*]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+ return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base, 0);
+}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
new file mode 100644
index 00000000000000..66e4550294ac7a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
@@ -0,0 +1,1078 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-C-LABEL: @test_svwrite_hor_za8_s8(
+// CHECK-CXX-LABEL: @_Z23test_svwrite_hor_za8_s8ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za8_s8_1(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za8_s8_1ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 15, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_s16(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_s16ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_s16_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_s16_1ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za32_s32(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_s32ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za32_s32_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_s32_1ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice_base, 3, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za64_s64(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_s64ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za64_s64_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_s64_1ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice_base, 1, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za8_u8(
+// CHECK-CXX-LABEL: @_Z23test_svwrite_hor_za8_u8ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za8_u8_1(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za8_u8_1ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 15, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_u16(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_u16ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_u16_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_u16_1ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za32_u32(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_u32ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za32_u32_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_u32_1ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice_base, 3, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za64_u64(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_u64ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za64_u64_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_u64_1ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice_base, 1, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_f16(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_f16ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_f16_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_f16_1ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_bf16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za16_bf16ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za16_bf16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za32_f32(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_f32ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za32_f32_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_f32_1ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[TILESLICE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice_base, 3, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za64_f64(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_f64ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za64_f64_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_f64_1ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[TILESLICE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice_base, 1, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s8(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za128_s8ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s8_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_s8_1ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s16ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s16_1ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s32(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s32ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s32_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s32_1ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s64(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s64ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_s64_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s64_1ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u8(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za128_u8ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u8_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_u8_1ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u16ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u16_1ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u32(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u32ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u32_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u32_1ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u64(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u64ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_u64_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u64_1ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_f16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f16ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_f16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f16_1ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_bf16(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_bf16ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_bf16_1(
+// CHECK-CXX-LABEL: @_Z29test_svwrite_hor_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_f32(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f32ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_f32_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f32_1ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_f64(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f64ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_hor_za128_f64_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f64_1ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za8_s8(
+// CHECK-CXX-LABEL: @_Z23test_svwrite_ver_za8_s8ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za8_s8_1(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za8_s8_1ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 15, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_s16(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_s16ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_s16_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_s16_1ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za32_s32(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_s32ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za32_s32_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_s32_1ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice_base, 3, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za64_s64(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_s64ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za64_s64_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_s64_1ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice_base, 1, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za8_u8(
+// CHECK-CXX-LABEL: @_Z23test_svwrite_ver_za8_u8ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za8_u8_1(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za8_u8_1ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 15, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_u16(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_u16ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_u16_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_u16_1ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za32_u32(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_u32ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za32_u32_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_u32_1ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice_base, 3, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za64_u64(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_u64ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za64_u64_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_u64_1ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice_base, 1, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_f16(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_f16ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_f16_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_f16_1ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_bf16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za16_bf16ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za16_bf16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[TILESLICE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice_base, 7, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za32_f32(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_f32ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za32_f32_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_f32_1ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[TILESLICE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice_base, 3, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za64_f64(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_f64ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za64_f64_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_f64_1ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[TILESLICE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice_base, 1, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s8(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za128_s8ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s8_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_s8_1ju10__SVBool_tu10__SVInt8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s16ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s16_1ju10__SVBool_tu11__SVInt16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s32(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s32ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s32_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s32_1ju10__SVBool_tu11__SVInt32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s64(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s64ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_s64_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s64_1ju10__SVBool_tu11__SVInt64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u8(
+// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za128_u8ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u8_1(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_u8_1ju10__SVBool_tu11__SVUint8_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u16ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u16_1ju10__SVBool_tu12__SVUint16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u32(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u32ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u32_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u32_1ju10__SVBool_tu12__SVUint32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u64(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u64ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_u64_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u64_1ju10__SVBool_tu12__SVUint64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_f16(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f16ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_f16_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f16_1ju10__SVBool_tu13__SVFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_bf16(
+// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_bf16ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_bf16_1(
+// CHECK-CXX-LABEL: @_Z29test_svwrite_ver_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_f32(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f32ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_f32_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f32_1ju10__SVBool_tu13__SVFloat32_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_f64(
+// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f64ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, 0, pg, zn);
+}
+
+// CHECK-C-LABEL: @test_svwrite_ver_za128_f64_1(
+// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f64_1ju10__SVBool_tu13__SVFloat64_t(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN:%.*]])
+// CHECK-NEXT: ret void
+//
+void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+ SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, 0, pg, zn);
+}
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
index ccb0fec8d4c0f2..2142dbe4c3a523 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
@@ -1,7 +1,7 @@
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s
#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
@@ -12,13 +12,6 @@
#include <arm_sme_draft_spec_subject_to_change.h>
-#ifdef DISABLE_SME_ATTRIBUTES
-#define ARM_STREAMING_ATTR
-#else
-#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
-#endif
-
-ARM_STREAMING_ATTR
void test_range_0_0(svbool_t pg, void *ptr) {
// expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, -1, 0, pg, ptr);
@@ -36,9 +29,25 @@ void test_range_0_0(svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(0, -1, -1, pg, ptr, 1);
// expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(15, -1, 1, pg, ptr, 1);
+
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, -1, -1, 0);
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 1, -1, 15);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, 0, -1, -1);
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 15, -1, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(-1, -1, 0, pg, svundef_s8());
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(1, -1, 15, pg, svundef_s8());
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(0, -1, -1, pg, svundef_s8());
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(15, -1, 1, pg, svundef_s8());
}
-ARM_STREAMING_ATTR
void test_range_0_1(svbool_t pg, void *ptr) {
// expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, -1, 0, pg, ptr);
@@ -56,9 +65,25 @@ void test_range_0_1(svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(0, -1, -1, pg, ptr, 1);
// expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(7, -1, 2, pg, ptr, 1);
+
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, -1, -1, 0);
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 2, -1, 7);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, 0, -1, -1);
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 7, -1, 2);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(-1, -1, 0, pg, svundef_s16());
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(2, -1, 7, pg, svundef_s16());
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(0, -1, -1, pg, svundef_s64());
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(7, -1, 2, pg, svundef_s64());
}
-ARM_STREAMING_ATTR
void test_range_0_3(svbool_t pg, void *ptr) {
// expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, -1, 0, pg, ptr);
@@ -76,9 +101,25 @@ void test_range_0_3(svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(0, -1, -1, pg, ptr, 1);
// expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(3, -1, 4, pg, ptr, 1);
+
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, -1, -1, 0);
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 4, -1, 3);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, 0, -1, -1);
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 3, -1, 4);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(-1, -1, 0, pg, svundef_s32());
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(4, -1, 3, pg, svundef_s32());
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(0, -1, -1, pg, svundef_s32());
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(3, -1, 4, pg, svundef_s32());
}
-ARM_STREAMING_ATTR
void test_range_0_7(svbool_t pg, void *ptr) {
// expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, -1, 0, pg, ptr);
@@ -96,9 +137,25 @@ void test_range_0_7(svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(0, -1, -1, pg, ptr, 1);
// expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(1, -1, 8, pg, ptr, 1);
+
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, -1, -1, 0);
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 8, -1, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, 0, -1, -1);
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 1, -1, 8);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(-1, -1, 0, pg, svundef_s64());
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(8, -1, 1, pg, svundef_s64());
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(0, -1, -1, pg, svundef_s16());
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(1, -1, 8, pg, svundef_s16());
}
-ARM_STREAMING_ATTR
void test_range_0_15(svbool_t pg, void *ptr) {
// expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, -1, 0, pg, ptr);
@@ -116,9 +173,25 @@ void test_range_0_15(svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(0, -1, -1, pg, ptr, 1);
// expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(0, -1, 16, pg, ptr, 1);
+
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, -1, 0);
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 16, -1, 0);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, 0, -1, -1);
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 0, -1, 16);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(-1, -1, 0, pg, svundef_s8());
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(16, -1, 0, pg, svundef_s8());
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(0, -1, -1, pg, svundef_s8());
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(0, -1, 16, pg, svundef_s8());
}
-ARM_STREAMING_ATTR
void test_constant(uint64_t u64, svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}}
SVE_ACLE_FUNC(svld1_ver_za16,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svld1_ver_za16' must be a constant integer}}
@@ -128,4 +201,9 @@ void test_constant(uint64_t u64, svbool_t pg, void *ptr) {
SVE_ACLE_FUNC(svld1_ver_vnum_za16,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svld1_ver_vnum_za16' must be a constant integer}}
SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}}
SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svst1_ver_vnum_za64' must be a constant integer}}
+
+ SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, 0, u64, u64); // expected-error-re {{argument to 'svread_hor_za8{{.*}}_m' must be a constant integer}}
+ SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, u64, 0); // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}}
+ SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(0, u64, u64, pg, svundef_s32()); // expected-error-re {{argument to 'svwrite_hor_za32{{.*}}_m' must be a constant integer}}
+ SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}}
}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index c423e35b5e3b63..dbf5122fdf2231 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1489,6 +1489,8 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
OS << "/* Function attributes */\n";
OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
"__nodebug__))\n\n";
+ OS << "#define __aio static __inline__ __attribute__((__always_inline__, "
+ "__nodebug__, __overloadable__))\n\n";
OS << "#ifdef __cplusplus\n";
OS << "extern \"C\" {\n";
More information about the cfe-commits
mailing list