[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
Sam Tebbs via cfe-commits
cfe-commits at lists.llvm.org
Thu Dec 7 06:00:47 PST 2023
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/74064
>From 5fc69175acf2cd5f80853fec494a37b2603b498a Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Thu, 30 Nov 2023 13:42:50 +0000
Subject: [PATCH 1/7] [AArch64][SME] Warn when using a streaming builtin from a
non-streaming function
This PR adds a warning that's emitted when a non-streaming or
non-streaming-compatible builtin is called in an unsuitable function.
Uses work by Kerry McLaughlin.
---
clang/include/clang/Basic/CMakeLists.txt | 6 ++
.../clang/Basic/DiagnosticSemaKinds.td | 3 +
clang/include/clang/Sema/Sema.h | 1 +
clang/lib/Sema/SemaChecking.cpp | 77 +++++++++++++++----
.../Sema/aarch64-incompat-sm-builtin-calls.c | 21 +++++
clang/utils/TableGen/SveEmitter.cpp | 68 ++++++++++++++++
clang/utils/TableGen/TableGen.cpp | 9 +++
clang/utils/TableGen/TableGenBackends.h | 1 +
8 files changed, 173 insertions(+), 13 deletions(-)
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 085e316fcc671..bdd72d1d63c43 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -97,6 +97,12 @@ clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen
clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks
SOURCE arm_sme.td
TARGET ClangARMSmeSemaRangeChecks)
+clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeStreamingAttrs)
+clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltinsZAState)
clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
SOURCE arm_cde.td
TARGET ClangARMCdeBuiltinsDef)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6dfb2d7195203..c7036fc881a13 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3151,6 +3151,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error<
def warn_attribute_arm_sm_incompat_builtin : Warning<
"builtin call has undefined behaviour when called from a %0 function">,
InGroup<DiagGroup<"undefined-arm-streaming">>;
+def warn_attribute_arm_za_builtin_no_za_state : Warning<
+ "builtin call is not valid when calling from a function without active ZA state">,
+ InGroup<DiagGroup<"undefined-arm-za">>;
def err_sve_vector_in_non_sve_target : Error<
"SVE vector type %0 cannot be used in a target without sve">;
def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 26f4e38a46e83..8ad49b3c6f98d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13843,6 +13843,7 @@ class Sema final {
bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool ParseSVEImmChecks(CallExpr *TheCall,
SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
+ bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
bool CheckARMCoprocessorImmediate(const TargetInfo &TI, const Expr *CoprocArg,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index fc6ee6b2c5ab4..e2d717d4ff2ce 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3046,18 +3046,6 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
HasError = true;
break;
- case SVETypeFlags::ImmCheck1_1:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_3:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_7:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
- HasError = true;
- break;
case SVETypeFlags::ImmCheckExtract:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
@@ -3168,11 +3156,71 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
<< TheCall->getSourceRange() << "streaming compatible";
return;
}
+
+ if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+ S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
+ << TheCall->getSourceRange() << "non-streaming";
+ }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+ if (FD->hasAttr<ArmNewZAAttr>())
+ return true;
+ if (const auto *T = FD->getType()->getAs<FunctionProtoType>())
+ if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+ return true;
+ return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+ }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+ if (const FunctionDecl *FD = getCurFunctionDecl()) {
+ bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm";
+ std::optional<ArmStreamingType> BuiltinType;
+
+ switch (BuiltinID) {
+ default:
+ break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"
+#undef GET_SME_STREAMING_ATTRS
+ }
+
+ if (BuiltinType)
+ checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType);
+
+ if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD))
+ Diag(TheCall->getBeginLoc(),
+ diag::warn_attribute_arm_za_builtin_no_za_state)
+ << TheCall->getSourceRange();
+ }
+
+ // Range check SME intrinsics that take immediate values.
+ SmallVector<std::tuple<int, int, int>, 3> ImmChecks;
+
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_SME_IMMEDIATE_CHECK
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
+#undef GET_SME_IMMEDIATE_CHECK
+ }
+
+ return ParseSVEImmChecks(TheCall, ImmChecks);
}
bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
// Range check SVE intrinsics that take immediate values.
- SmallVector<std::tuple<int, int, int>, 3> ImmChecks;
+ SmallVector<std::tuple<int,int,int>, 3> ImmChecks;
switch (BuiltinID) {
default:
@@ -3569,6 +3617,9 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
if (CheckSVEBuiltinFunctionCall(BuiltinID, TheCall))
return true;
+ if (CheckSMEBuiltinFunctionCall(BuiltinID, TheCall))
+ return true;
+
// For intrinsics which take an immediate value as part of the instruction,
// range check them here.
unsigned i = 0, l = 0, u = 0;
diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
index e77e09c443518..8f33075c7a9b5 100644
--- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
+++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
@@ -5,6 +5,7 @@
// REQUIRES: aarch64-registered-target
#include "arm_neon.h"
+#include "arm_sme_draft_spec_subject_to_change.h"
int16x8_t incompat_neon_sm(int16x8_t splat) __arm_streaming {
// expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming function}}
@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible {
// expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming compatible function}}
return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33);
}
+
+void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
+ return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
+}
+
+void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming compatible function}}
+ return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
+}
+
+void incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
+ svmops_za32_f32_m(0, pn, pm, zn, zm);
+}
+
+svbool_t streaming_caller_ptrue(void) __arm_streaming {
+ // expected-no-warning
+ return svand_z(svptrue_b16(), svptrue_pat_b16(SV_ALL), svptrue_pat_b16(SV_VL4));
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b8a1fb4bab0f4..b4809db9b9ec4 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -378,6 +378,9 @@ class SVEEmitter {
/// Emit all the information needed to map builtin -> LLVM IR intrinsic.
void createSMECodeGenMap(raw_ostream &o);
+ /// Create a table for a builtin's requirement for PSTATE.SM.
+ void createStreamingAttrs(raw_ostream &o, ACLEKind Kind);
+
/// Emit all the range checks for the immediates.
void createSMERangeChecks(raw_ostream &o);
@@ -1375,6 +1378,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
OS << "#define __aio static __inline__ __attribute__((__always_inline__, "
"__nodebug__, __overloadable__))\n\n";
+ OS << "#ifdef __ARM_FEATURE_SME\n";
+ OS << "#define __asc __attribute__((arm_streaming_compatible))\n";
+ OS << "#else\n";
+ OS << "#define __asc\n";
+ OS << "#endif\n\n";
+
// Add reinterpret functions.
for (auto [N, Suffix] :
std::initializer_list<std::pair<unsigned, const char *>>{
@@ -1694,6 +1703,61 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
OS << "#endif\n\n";
}
+void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ switch (Kind) {
+ case ACLEKind::SME:
+ OS << "#ifdef GET_SME_STREAMING_ATTRS\n";
+ break;
+ case ACLEKind::SVE:
+ OS << "#ifdef GET_SVE_STREAMING_ATTRS\n";
+ break;
+ }
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming");
+ uint64_t IsStreamingCompatibleFlag =
+ getEnumValueForFlag("IsStreamingCompatible");
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end())
+ continue;
+
+ switch (Kind) {
+ case ACLEKind::SME:
+ OS << "case SME::BI__builtin_sme_";
+ break;
+ case ACLEKind::SVE:
+ OS << "case SVE::BI__builtin_sve_";
+ break;
+ }
+ OS << Def->getMangledName() << ":\n";
+
+ if (Def->isFlagSet(IsStreamingFlag))
+ OS << " BuiltinType = ArmStreaming;\n";
+ else if (Def->isFlagSet(IsStreamingCompatibleFlag))
+ OS << " BuiltinType = ArmStreamingCompatible;\n";
+ else
+ OS << " BuiltinType = ArmNonStreaming;\n";
+ OS << " break;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
namespace clang {
void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createHeader(OS);
@@ -1730,4 +1794,8 @@ void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createSMERangeChecks(OS);
}
+
+void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME);
+}
} // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 7efb6c731d3e5..9ba2fb07f1380 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -89,6 +89,8 @@ enum ActionType {
GenArmSmeBuiltins,
GenArmSmeBuiltinCG,
GenArmSmeRangeChecks,
+ GenArmSmeStreamingAttrs,
+ GenArmSmeBuiltinZAState,
GenArmCdeHeader,
GenArmCdeBuiltinDef,
GenArmCdeBuiltinSema,
@@ -251,6 +253,10 @@ cl::opt<ActionType> Action(
"Generate arm_sme_builtin_cg_map.inc for clang"),
clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks",
"Generate arm_sme_sema_rangechecks.inc for clang"),
+ clEnumValN(GenArmSmeStreamingAttrs, "gen-arm-sme-streaming-attrs",
+ "Generate arm_sme_streaming_attrs.inc for clang"),
+ clEnumValN(GenArmSmeBuiltinZAState, "gen-arm-sme-builtin-za-state",
+ "Generate arm_sme_builtins_za_state.inc for clang"),
clEnumValN(GenArmMveHeader, "gen-arm-mve-header",
"Generate arm_mve.h for clang"),
clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def",
@@ -500,6 +506,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmSmeRangeChecks:
EmitSmeRangeChecks(Records, OS);
break;
+ case GenArmSmeStreamingAttrs:
+ EmitSmeStreamingAttrs(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index d8f447069376b..2f1c96bfa5964 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -109,6 +109,7 @@ void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
>From a0dd6bea30a030210573b176c982f31487de1c3c Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Fri, 1 Dec 2023 16:08:21 +0000
Subject: [PATCH 2/7] fixup! adjust tests
---
.../aarch64-sme-intrinsics/acle_sme_add-i32.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_add-i64.c | 16 +-
.../acle_sme_mopa-za32.c | 14 +-
.../acle_sme_mopa-za64.c | 10 +-
.../acle_sme_mops-za32.c | 14 +-
.../acle_sme_mops-za64.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_read.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_write.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_target.c | 10 +-
9 files changed, 237 insertions(+), 237 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
index e444321be41b2..ee6c1c9dd566b 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
index 4b2f71d607eb6..254ea89d22c50 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
index 88206eec76b97..b90c9be4a6e09 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
+void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
+void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) {
+void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) {
+void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) {
+void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) {
+void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) {
+void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
index 1a1ab1e00d0ec..a56ce4d17f126 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) {
+void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) {
+void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) {
+void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
index 1633cc6ed32e0..abcf4c2e698d7 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
+void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
+void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) {
+void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) {
+void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) {
+void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) {
+void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) {
+void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
index b17df30c0f851..b26b9e4e51e05 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) {
+void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) {
+void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) {
+void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
index d63900129f994..a15599d186a87 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base);
}
@@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
index 3f11aa8c5eb2e..2cc338add314b 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn);
}
@@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 2de6d9f6877f0..812ada00ceb39 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -6,20 +6,20 @@
#include <arm_sme_draft_spec_subject_to_change.h>
__attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming {
svld1_hor_za8(0, 0, pg, ptr);
}
__attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming {
svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
}
__attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za16(0, 0, pg, ptr);
}
-void undefined(svbool_t pg, void *ptr) {
- svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}}
+void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}}
+ svst1_ver_vnum_za64(0, 0, pg, ptr, 0);
}
>From 13974b81a5d2c795ce3f37844f16d66284ec3b05 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 5 Dec 2023 11:10:50 +0000
Subject: [PATCH 3/7] fixup: add sme to target test
---
clang/lib/Sema/SemaChecking.cpp | 1 -
clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c | 5 +++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e2d717d4ff2ce..1c7174854ec7a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3184,7 +3184,6 @@ static bool hasSMEZAState(unsigned BuiltinID) {
bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (const FunctionDecl *FD = getCurFunctionDecl()) {
- bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm";
std::optional<ArmStreamingType> BuiltinType;
switch (BuiltinID) {
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 812ada00ceb39..7cfe9fdfbd24f 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -20,6 +20,7 @@ void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za16(0, 0, pg, ptr);
}
-void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}}
- svst1_ver_vnum_za64(0, 0, pg, ptr, 0);
+__attribute__((target("+sme")))
+void undefined(svbool_t pg, void *ptr) {
+ svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}}
}
>From 9d9e13dc2c8fe2bc19b3ce22518ad58d3a96e8d0 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 5 Dec 2023 11:23:53 +0000
Subject: [PATCH 4/7] fixup: use ParseSVEImmChecks in
CheckSVEBuiltinFunctionCall
---
clang/lib/Sema/SemaChecking.cpp | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1c7174854ec7a..922c8d94632bb 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3117,6 +3117,18 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck1_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+ HasError = true;
+ break;
case SVETypeFlags::ImmCheck2_4_Mul2:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
>From 27075ede887854d29c54be79f7dfafbbb7b38a2c Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 5 Dec 2023 16:44:35 +0000
Subject: [PATCH 5/7] fixup: add za state tablegen generation
---
.../aarch64-sme-intrinsics/acle_sme_target.c | 8 ++--
clang/utils/TableGen/SveEmitter.cpp | 41 +++++++++++++++++++
clang/utils/TableGen/TableGen.cpp | 3 ++
clang/utils/TableGen/TableGenBackends.h | 1 +
4 files changed, 49 insertions(+), 4 deletions(-)
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 7cfe9fdfbd24f..95bb6be2d2d34 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -6,21 +6,21 @@
#include <arm_sme_draft_spec_subject_to_change.h>
__attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) __arm_streaming {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za8(0, 0, pg, ptr);
}
__attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
}
__attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za16(0, 0, pg, ptr);
}
__attribute__((target("+sme")))
-void undefined(svbool_t pg, void *ptr) {
+void undefined(svbool_t pg, void *ptr) __arm_shared_za {
svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}}
}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b4809db9b9ec4..7605d343e7786 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -384,6 +384,9 @@ class SVEEmitter {
/// Emit all the range checks for the immediates.
void createSMERangeChecks(raw_ostream &o);
+ /// Create a table for a builtin's requirement for PSTATE.ZA.
+ void createBuiltinZAState(raw_ostream &OS);
+
/// Create intrinsic and add it to \p Out
void createIntrinsic(Record *R,
SmallVectorImpl<std::unique_ptr<Intrinsic>> &Out);
@@ -1703,6 +1706,40 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
OS << "#endif\n\n";
}
+void SVEEmitter::createBuiltinZAState(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ OS << "#ifdef GET_SME_BUILTIN_HAS_ZA_STATE\n";
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ uint64_t IsSharedZAFlag = getEnumValueForFlag("IsSharedZA");
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end())
+ continue;
+
+ OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
+ if (Def->isFlagSet(IsSharedZAFlag))
+ OS << " return true;\n";
+ else
+ OS << " return false;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
@@ -1798,4 +1835,8 @@ void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME);
}
+
+void EmitSmeBuiltinZAState(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createBuiltinZAState(OS);
+}
} // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 9ba2fb07f1380..89e90adf7f334 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -509,6 +509,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmSmeStreamingAttrs:
EmitSmeStreamingAttrs(Records, OS);
break;
+ case GenArmSmeBuiltinZAState:
+ EmitSmeBuiltinZAState(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 2f1c96bfa5964..dabbf9d2055a7 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -110,6 +110,7 @@ void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeBuiltinZAState(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
>From 80c0cda9b88a7d14fd6184b9b6ce2a4345b71a79 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 6 Dec 2023 12:45:12 +0000
Subject: [PATCH 6/7] fixup: fix za state tests
---
clang/lib/Sema/SemaChecking.cpp | 34 ++--
.../aarch64-sme-intrinsics/acle_sme_add-i32.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_add-i64.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_ld1.c | 20 +-
.../acle_sme_ld1_vnum.c | 20 +-
.../aarch64-sme-intrinsics/acle_sme_ldr.c | 10 +-
.../acle_sme_mopa-za32.c | 14 +-
.../acle_sme_mopa-za64.c | 10 +-
.../acle_sme_mops-za32.c | 14 +-
.../acle_sme_mops-za64.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_read.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_st1.c | 20 +-
.../acle_sme_st1_vnum.c | 20 +-
.../aarch64-sme-intrinsics/acle_sme_str.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_write.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_zero.c | 8 +-
16 files changed, 303 insertions(+), 303 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 922c8d94632bb..c7de8db15247c 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3046,6 +3046,23 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck1_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck2_4_Mul2:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
+ SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
+ HasError = true;
+ break;
case SVETypeFlags::ImmCheckExtract:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
@@ -3117,23 +3134,6 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
HasError = true;
break;
- case SVETypeFlags::ImmCheck1_1:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_3:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_7:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck2_4_Mul2:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
- SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
- HasError = true;
- break;
}
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
index ee6c1c9dd566b..55d2e355897f7 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
index 254ea89d22c50..8e9c2e7da46a3 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
index 5622568c4cd76..e17782db222b6 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
@@ -22,7 +22,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za8(0, slice_base, pg, ptr);
svld1_hor_za8(0, slice_base + 15, pg, ptr);
}
@@ -45,7 +45,7 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za16(0, slice_base, pg, ptr);
svld1_hor_za16(1, slice_base + 7, pg, ptr);
}
@@ -68,7 +68,7 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za32(0, slice_base, pg, ptr);
svld1_hor_za32(3, slice_base + 3, pg, ptr);
}
@@ -91,7 +91,7 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za64(0, slice_base, pg, ptr);
svld1_hor_za64(7, slice_base + 1, pg, ptr);
}
@@ -112,7 +112,7 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za128(0, slice_base, pg, ptr);
svld1_hor_za128(15, slice_base, pg, ptr);
}
@@ -133,7 +133,7 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za8(0, slice_base, pg, ptr);
svld1_ver_za8(0, slice_base + 15, pg, ptr);
}
@@ -156,7 +156,7 @@ void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za16(0, slice_base, pg, ptr);
svld1_ver_za16(1, slice_base + 7, pg, ptr);
}
@@ -179,7 +179,7 @@ void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za32(0, slice_base, pg, ptr);
svld1_ver_za32(3, slice_base + 3, pg, ptr);
}
@@ -202,7 +202,7 @@ void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za64(0, slice_base, pg, ptr);
svld1_ver_za64(7, slice_base + 1, pg, ptr);
}
@@ -223,7 +223,7 @@ void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za128(0, slice_base, pg, ptr);
svld1_ver_za128(15, slice_base, pg, ptr);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
index 0fe7dcfc0a799..0fa77e1144a7d 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
@@ -28,7 +28,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -57,7 +57,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -86,7 +86,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -115,7 +115,7 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -142,7 +142,7 @@ void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}
@@ -169,7 +169,7 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -198,7 +198,7 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -227,7 +227,7 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -256,7 +256,7 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -283,7 +283,7 @@ void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index 9af0778e89c5e..314c9645dd4f7 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -12,7 +12,7 @@
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
+void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, 0);
}
@@ -22,7 +22,7 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15)
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
+void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, 15);
}
@@ -32,7 +32,7 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svldr_za(uint32_t slice_base, const void *ptr) {
+void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_za(slice_base, ptr);
}
@@ -43,7 +43,7 @@ void test_svldr_za(uint32_t slice_base, const void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]])
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) {
+void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, vnum);
}
@@ -53,6 +53,6 @@ void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum)
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16)
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) {
+void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, 16);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
index b90c9be4a6e09..e84f31c2dfa92 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
+void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
+void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
+void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
+void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
+void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
index a56ce4d17f126..1b22eb64e9e36 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
+void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
+void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
+void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
index abcf4c2e698d7..0ff97ff92f714 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
+void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
+void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
+void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
+void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
+void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
index b26b9e4e51e05..5b190a7f9b748 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
+void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
+void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
+void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
index a15599d186a87..843a96da90278 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base);
}
@@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
index 9ed158aedf7e3..98ebbefc2e74c 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
@@ -22,7 +22,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za8(0, slice_base, pg, ptr);
svst1_hor_za8(0, slice_base + 15, pg, ptr);
}
@@ -45,7 +45,7 @@ void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za16(0, slice_base, pg, ptr);
svst1_hor_za16(1, slice_base + 7, pg, ptr);
}
@@ -68,7 +68,7 @@ void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za32(0, slice_base, pg, ptr);
svst1_hor_za32(3, slice_base + 3, pg, ptr);
}
@@ -91,7 +91,7 @@ void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za64(0, slice_base, pg, ptr);
svst1_hor_za64(7, slice_base + 1, pg, ptr);
}
@@ -112,7 +112,7 @@ void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za128(0, slice_base, pg, ptr);
svst1_hor_za128(15, slice_base, pg, ptr);
}
@@ -133,7 +133,7 @@ void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_str
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za8(0, slice_base, pg, ptr);
svst1_ver_za8(0, slice_base + 15, pg, ptr);
}
@@ -156,7 +156,7 @@ void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za16(0, slice_base, pg, ptr);
svst1_ver_za16(1, slice_base + 7, pg, ptr);
}
@@ -179,7 +179,7 @@ void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za32(0, slice_base, pg, ptr);
svst1_ver_za32(3, slice_base + 3, pg, ptr);
}
@@ -202,7 +202,7 @@ void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za64(0, slice_base, pg, ptr);
svst1_ver_za64(7, slice_base + 1, pg, ptr);
}
@@ -223,7 +223,7 @@ void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za128(0, slice_base, pg, ptr);
svst1_ver_za128(15, slice_base, pg, ptr);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
index 627098d9365bd..938e62a15c771 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
@@ -28,7 +28,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -57,7 +57,7 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -86,7 +86,7 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -115,7 +115,7 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -142,7 +142,7 @@ void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}
@@ -169,7 +169,7 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -198,7 +198,7 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -227,7 +227,7 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -256,7 +256,7 @@ void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -283,7 +283,7 @@ void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
index baadfc18563a0..bcf368bc8dce4 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -12,7 +12,7 @@
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
+void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, 0);
}
@@ -22,7 +22,7 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15)
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
+void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, 15);
}
@@ -32,7 +32,7 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svstr_za(uint32_t slice_base, void *ptr) {
+void test_svstr_za(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_za(slice_base, ptr);
}
@@ -43,7 +43,7 @@ void test_svstr_za(uint32_t slice_base, void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]])
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) {
+void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, vnum);
}
@@ -53,6 +53,6 @@ void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16)
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) {
+void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, 16);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
index 2cc338add314b..38c8402c3d0fa 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn);
}
@@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c
index 750eead7c705e..8b5bd79984c21 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c
@@ -18,7 +18,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 0)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_mask_za() {
+__arm_new_za void test_svzero_mask_za() {
svzero_mask_za(0);
}
@@ -34,7 +34,7 @@ void test_svzero_mask_za() {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 176)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_mask_za_1() {
+__arm_new_za void test_svzero_mask_za_1() {
svzero_mask_za(176);
}
@@ -50,7 +50,7 @@ void test_svzero_mask_za_1() {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_mask_za_2() {
+__arm_new_za void test_svzero_mask_za_2() {
svzero_mask_za(255);
}
@@ -66,7 +66,7 @@ void test_svzero_mask_za_2() {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_za() {
+__arm_new_za void test_svzero_za() {
svzero_za();
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
>From 48ee745a815f0fda41cb1791f91d73db73e4aeba Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Thu, 7 Dec 2023 13:57:56 +0000
Subject: [PATCH 7/7] fixup: fix rebase issue
---
clang/lib/Sema/SemaChecking.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c7de8db15247c..24a0116ffe67c 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3058,11 +3058,6 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
HasError = true;
break;
- case SVETypeFlags::ImmCheck2_4_Mul2:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
- SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
- HasError = true;
- break;
case SVETypeFlags::ImmCheckExtract:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
@@ -3134,6 +3129,11 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck2_4_Mul2:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
+ SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
+ HasError = true;
+ break;
}
}
More information about the cfe-commits
mailing list