[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
Sam Tebbs via cfe-commits
cfe-commits at lists.llvm.org
Wed Dec 6 04:47:51 PST 2023
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/74064
>From 205471ca03ea8d266ec5f6fb7a92b8953ef183db Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Thu, 30 Nov 2023 13:42:50 +0000
Subject: [PATCH 1/6] [AArch64][SME] Warn when using a streaming builtin from a
non-streaming function
This PR adds a warning that's emitted when a non-streaming or
non-streaming-compatible builtin is called in an unsuitable function.
Uses work by Kerry McLaughlin.
---
clang/include/clang/Basic/CMakeLists.txt | 6 +
.../clang/Basic/DiagnosticSemaKinds.td | 3 +
clang/include/clang/Sema/Sema.h | 3 +
clang/lib/Sema/SemaChecking.cpp | 191 ++++++++++++++++++
.../Sema/aarch64-incompat-sm-builtin-calls.c | 21 ++
clang/utils/TableGen/SveEmitter.cpp | 68 +++++++
clang/utils/TableGen/TableGen.cpp | 9 +
clang/utils/TableGen/TableGenBackends.h | 1 +
8 files changed, 302 insertions(+)
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 085e316fcc671..bdd72d1d63c43 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -97,6 +97,12 @@ clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen
clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks
SOURCE arm_sme.td
TARGET ClangARMSmeSemaRangeChecks)
+clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeStreamingAttrs)
+clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltinsZAState)
clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
SOURCE arm_cde.td
TARGET ClangARMCdeBuiltinsDef)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6dfb2d7195203..c7036fc881a13 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3151,6 +3151,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error<
def warn_attribute_arm_sm_incompat_builtin : Warning<
"builtin call has undefined behaviour when called from a %0 function">,
InGroup<DiagGroup<"undefined-arm-streaming">>;
+def warn_attribute_arm_za_builtin_no_za_state : Warning<
+ "builtin call is not valid when calling from a function without active ZA state">,
+ InGroup<DiagGroup<"undefined-arm-za">>;
def err_sve_vector_in_non_sve_target : Error<
"SVE vector type %0 cannot be used in a target without sve">;
def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index e745c01ae0ae0..7413ff96f3c7b 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13840,7 +13840,10 @@ class Sema final {
bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool ParseSVEImmChecks(CallExpr *TheCall,
+ SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
bool CheckARMCoprocessorImmediate(const TargetInfo &TI, const Expr *CoprocArg,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 07ced5ffc3407..4ae148731d570 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3000,6 +3000,134 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible };
+bool Sema::ParseSVEImmChecks(
+ CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
+ // Perform all the immediate checks for this builtin call.
+ bool HasError = false;
+ for (auto &I : ImmChecks) {
+ int ArgNum, CheckTy, ElementSizeInBits;
+ std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
+
+ typedef bool (*OptionSetCheckFnTy)(int64_t Value);
+
+ // Function that checks whether the operand (ArgNum) is an immediate
+ // that is one of the predefined values.
+ auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+ int ErrDiag) -> bool {
+ // We can't check the value of a dependent argument.
+ Expr *Arg = TheCall->getArg(ArgNum);
+ if (Arg->isTypeDependent() || Arg->isValueDependent())
+ return false;
+
+ // Check constant-ness first.
+ llvm::APSInt Imm;
+ if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm))
+ return true;
+
+ if (!CheckImm(Imm.getSExtValue()))
+ return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+ return false;
+ };
+
+ switch ((SVETypeFlags::ImmCheckType)CheckTy) {
+ case SVETypeFlags::ImmCheck0_31:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_13:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_16:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckExtract:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (2048 / ElementSizeInBits) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckShiftRight:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckShiftRightNarrow:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1,
+ ElementSizeInBits / 2))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckShiftLeft:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ ElementSizeInBits - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckLaneIndex:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (128 / (1 * ElementSizeInBits)) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (128 / (2 * ElementSizeInBits)) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckLaneIndexDot:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (128 / (4 * ElementSizeInBits)) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckComplexRot90_270:
+ if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+ diag::err_rotation_argument_to_cadd))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckComplexRotAll90:
+ if (CheckImmediateInSet(
+ [](int64_t V) {
+ return V == 0 || V == 90 || V == 180 || V == 270;
+ },
+ diag::err_rotation_argument_to_cmla))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_2:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_0:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_15:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_255:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck2_4_Mul2:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
+ SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
+ HasError = true;
+ break;
+ }
+ }
+
+ return HasError;
+}
+
static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
if (FD->hasAttr<ArmLocallyStreamingAttr>())
return ArmStreaming;
@@ -3028,6 +3156,66 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
<< TheCall->getSourceRange() << "streaming compatible";
return;
}
+
+ if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+ S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
+ << TheCall->getSourceRange() << "non-streaming";
+ }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+ if (FD->hasAttr<ArmNewZAAttr>())
+ return true;
+ if (const auto *T = FD->getType()->getAs<FunctionProtoType>())
+ if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+ return true;
+ return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+ }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+ if (const FunctionDecl *FD = getCurFunctionDecl()) {
+ bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm";
+ std::optional<ArmStreamingType> BuiltinType;
+
+ switch (BuiltinID) {
+ default:
+ break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"
+#undef GET_SME_STREAMING_ATTRS
+ }
+
+ if (BuiltinType)
+ checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType);
+
+ if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD))
+ Diag(TheCall->getBeginLoc(),
+ diag::warn_attribute_arm_za_builtin_no_za_state)
+ << TheCall->getSourceRange();
+ }
+
+ // Range check SME intrinsics that take immediate values.
+ SmallVector<std::tuple<int, int, int>, 3> ImmChecks;
+
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_SME_IMMEDIATE_CHECK
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
+#undef GET_SME_IMMEDIATE_CHECK
+ }
+
+ return ParseSVEImmChecks(TheCall, ImmChecks);
}
bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -3564,6 +3752,9 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
if (CheckSVEBuiltinFunctionCall(BuiltinID, TheCall))
return true;
+ if (CheckSMEBuiltinFunctionCall(BuiltinID, TheCall))
+ return true;
+
// For intrinsics which take an immediate value as part of the instruction,
// range check them here.
unsigned i = 0, l = 0, u = 0;
diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
index e77e09c443518..8f33075c7a9b5 100644
--- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
+++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
@@ -5,6 +5,7 @@
// REQUIRES: aarch64-registered-target
#include "arm_neon.h"
+#include "arm_sme_draft_spec_subject_to_change.h"
int16x8_t incompat_neon_sm(int16x8_t splat) __arm_streaming {
// expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming function}}
@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible {
// expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming compatible function}}
return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33);
}
+
+void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
+ return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
+}
+
+void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming compatible function}}
+ return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
+}
+
+void incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
+ svmops_za32_f32_m(0, pn, pm, zn, zm);
+}
+
+svbool_t streaming_caller_ptrue(void) __arm_streaming {
+ // expected-no-warning
+ return svand_z(svptrue_b16(), svptrue_pat_b16(SV_ALL), svptrue_pat_b16(SV_VL4));
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b380bd9dfe664..cb02ef09e130e 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -378,6 +378,9 @@ class SVEEmitter {
/// Emit all the information needed to map builtin -> LLVM IR intrinsic.
void createSMECodeGenMap(raw_ostream &o);
+ /// Create a table for a builtin's requirement for PSTATE.SM.
+ void createStreamingAttrs(raw_ostream &o, ACLEKind Kind);
+
/// Emit all the range checks for the immediates.
void createSMERangeChecks(raw_ostream &o);
@@ -1369,6 +1372,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
OS << "#define __aio static __inline__ __attribute__((__always_inline__, "
"__nodebug__, __overloadable__))\n\n";
+ OS << "#ifdef __ARM_FEATURE_SME\n";
+ OS << "#define __asc __attribute__((arm_streaming_compatible))\n";
+ OS << "#else\n";
+ OS << "#define __asc\n";
+ OS << "#endif\n\n";
+
// Add reinterpret functions.
for (auto [N, Suffix] :
std::initializer_list<std::pair<unsigned, const char *>>{
@@ -1688,6 +1697,61 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
OS << "#endif\n\n";
}
+void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ switch (Kind) {
+ case ACLEKind::SME:
+ OS << "#ifdef GET_SME_STREAMING_ATTRS\n";
+ break;
+ case ACLEKind::SVE:
+ OS << "#ifdef GET_SVE_STREAMING_ATTRS\n";
+ break;
+ }
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming");
+ uint64_t IsStreamingCompatibleFlag =
+ getEnumValueForFlag("IsStreamingCompatible");
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end())
+ continue;
+
+ switch (Kind) {
+ case ACLEKind::SME:
+ OS << "case SME::BI__builtin_sme_";
+ break;
+ case ACLEKind::SVE:
+ OS << "case SVE::BI__builtin_sve_";
+ break;
+ }
+ OS << Def->getMangledName() << ":\n";
+
+ if (Def->isFlagSet(IsStreamingFlag))
+ OS << " BuiltinType = ArmStreaming;\n";
+ else if (Def->isFlagSet(IsStreamingCompatibleFlag))
+ OS << " BuiltinType = ArmStreamingCompatible;\n";
+ else
+ OS << " BuiltinType = ArmNonStreaming;\n";
+ OS << " break;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
namespace clang {
void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createHeader(OS);
@@ -1724,4 +1788,8 @@ void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createSMERangeChecks(OS);
}
+
+void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME);
+}
} // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 7efb6c731d3e5..9ba2fb07f1380 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -89,6 +89,8 @@ enum ActionType {
GenArmSmeBuiltins,
GenArmSmeBuiltinCG,
GenArmSmeRangeChecks,
+ GenArmSmeStreamingAttrs,
+ GenArmSmeBuiltinZAState,
GenArmCdeHeader,
GenArmCdeBuiltinDef,
GenArmCdeBuiltinSema,
@@ -251,6 +253,10 @@ cl::opt<ActionType> Action(
"Generate arm_sme_builtin_cg_map.inc for clang"),
clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks",
"Generate arm_sme_sema_rangechecks.inc for clang"),
+ clEnumValN(GenArmSmeStreamingAttrs, "gen-arm-sme-streaming-attrs",
+ "Generate arm_sme_streaming_attrs.inc for clang"),
+ clEnumValN(GenArmSmeBuiltinZAState, "gen-arm-sme-builtin-za-state",
+ "Generate arm_sme_builtins_za_state.inc for clang"),
clEnumValN(GenArmMveHeader, "gen-arm-mve-header",
"Generate arm_mve.h for clang"),
clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def",
@@ -500,6 +506,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmSmeRangeChecks:
EmitSmeRangeChecks(Records, OS);
break;
+ case GenArmSmeStreamingAttrs:
+ EmitSmeStreamingAttrs(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index d8f447069376b..2f1c96bfa5964 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -109,6 +109,7 @@ void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
>From 5fd4f2c38cecb07b88c2b1ba02b1e328491516c4 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Fri, 1 Dec 2023 16:08:21 +0000
Subject: [PATCH 2/6] fixup! adjust tests
---
.../aarch64-sme-intrinsics/acle_sme_add-i32.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_add-i64.c | 16 +-
.../acle_sme_mopa-za32.c | 14 +-
.../acle_sme_mopa-za64.c | 10 +-
.../acle_sme_mops-za32.c | 14 +-
.../acle_sme_mops-za64.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_read.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_write.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_target.c | 10 +-
9 files changed, 237 insertions(+), 237 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
index e444321be41b2..ee6c1c9dd566b 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
index 4b2f71d607eb6..254ea89d22c50 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
index 88206eec76b97..b90c9be4a6e09 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
+void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
+void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) {
+void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) {
+void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) {
+void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) {
+void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) {
+void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
index 1a1ab1e00d0ec..a56ce4d17f126 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) {
+void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) {
+void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) {
+void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
index 1633cc6ed32e0..abcf4c2e698d7 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
+void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
+void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) {
+void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) {
+void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) {
+void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) {
+void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) {
+void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
index b17df30c0f851..b26b9e4e51e05 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) {
+void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) {
+void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) {
+void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
index d63900129f994..a15599d186a87 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base);
}
@@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
index 3f11aa8c5eb2e..2cc338add314b 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn);
}
@@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 2de6d9f6877f0..812ada00ceb39 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -6,20 +6,20 @@
#include <arm_sme_draft_spec_subject_to_change.h>
__attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming {
svld1_hor_za8(0, 0, pg, ptr);
}
__attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming {
svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
}
__attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za16(0, 0, pg, ptr);
}
-void undefined(svbool_t pg, void *ptr) {
- svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}}
+void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}}
+ svst1_ver_vnum_za64(0, 0, pg, ptr, 0);
}
>From 3065dc6cf7d251af0e58413a6fe5f15056198759 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 5 Dec 2023 11:10:50 +0000
Subject: [PATCH 3/6] fixup: add sme to target test
---
clang/lib/Sema/SemaChecking.cpp | 1 -
clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c | 5 +++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 4ae148731d570..f263ffffe8679 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3184,7 +3184,6 @@ static bool hasSMEZAState(unsigned BuiltinID) {
bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (const FunctionDecl *FD = getCurFunctionDecl()) {
- bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm";
std::optional<ArmStreamingType> BuiltinType;
switch (BuiltinID) {
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 812ada00ceb39..7cfe9fdfbd24f 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -20,6 +20,7 @@ void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za16(0, 0, pg, ptr);
}
-void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}}
- svst1_ver_vnum_za64(0, 0, pg, ptr, 0);
+__attribute__((target("+sme")))
+void undefined(svbool_t pg, void *ptr) {
+ svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}}
}
>From 72dc89fc88a99c27d975e18b4a738ae7840c3389 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 5 Dec 2023 11:23:53 +0000
Subject: [PATCH 4/6] fixup: use ParseSVEImmChecks in
CheckSVEBuiltinFunctionCall
---
clang/lib/Sema/SemaChecking.cpp | 148 +++-----------------------------
1 file changed, 13 insertions(+), 135 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f263ffffe8679..ed8954171c0c5 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3117,6 +3117,18 @@ bool Sema::ParseSVEImmChecks(
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck1_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
+ HasError = true;
+ break;
case SVETypeFlags::ImmCheck2_4_Mul2:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
@@ -3233,141 +3245,7 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
}
// Perform all the immediate checks for this builtin call.
- bool HasError = false;
- for (auto &I : ImmChecks) {
- int ArgNum, CheckTy, ElementSizeInBits;
- std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
-
- typedef bool(*OptionSetCheckFnTy)(int64_t Value);
-
- // Function that checks whether the operand (ArgNum) is an immediate
- // that is one of the predefined values.
- auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
- int ErrDiag) -> bool {
- // We can't check the value of a dependent argument.
- Expr *Arg = TheCall->getArg(ArgNum);
- if (Arg->isTypeDependent() || Arg->isValueDependent())
- return false;
-
- // Check constant-ness first.
- llvm::APSInt Imm;
- if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm))
- return true;
-
- if (!CheckImm(Imm.getSExtValue()))
- return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
- return false;
- };
-
- switch ((SVETypeFlags::ImmCheckType)CheckTy) {
- case SVETypeFlags::ImmCheck0_31:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_13:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_16:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_7:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_1:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_3:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck1_7:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckExtract:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
- (2048 / ElementSizeInBits) - 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckShiftRight:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckShiftRightNarrow:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1,
- ElementSizeInBits / 2))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckShiftLeft:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
- ElementSizeInBits - 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckLaneIndex:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
- (128 / (1 * ElementSizeInBits)) - 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckLaneIndexCompRotate:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
- (128 / (2 * ElementSizeInBits)) - 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckLaneIndexDot:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
- (128 / (4 * ElementSizeInBits)) - 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckComplexRot90_270:
- if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
- diag::err_rotation_argument_to_cadd))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheckComplexRotAll90:
- if (CheckImmediateInSet(
- [](int64_t V) {
- return V == 0 || V == 90 || V == 180 || V == 270;
- },
- diag::err_rotation_argument_to_cmla))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_1:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_2:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_3:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_0:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_15:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck0_255:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
- HasError = true;
- break;
- case SVETypeFlags::ImmCheck2_4_Mul2:
- if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
- SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
- HasError = true;
- break;
- }
- }
-
- return HasError;
+ return ParseSVEImmChecks(TheCall, ImmChecks);
}
bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
>From 03a686f83ab214a0692f8808462dcbd996928099 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 5 Dec 2023 16:44:35 +0000
Subject: [PATCH 5/6] fixup: add za state tablegen generation
---
.../aarch64-sme-intrinsics/acle_sme_target.c | 8 ++--
clang/utils/TableGen/SveEmitter.cpp | 41 +++++++++++++++++++
clang/utils/TableGen/TableGen.cpp | 3 ++
clang/utils/TableGen/TableGenBackends.h | 1 +
4 files changed, 49 insertions(+), 4 deletions(-)
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 7cfe9fdfbd24f..95bb6be2d2d34 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -6,21 +6,21 @@
#include <arm_sme_draft_spec_subject_to_change.h>
__attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) __arm_streaming {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za8(0, 0, pg, ptr);
}
__attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
}
__attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za16(0, 0, pg, ptr);
}
__attribute__((target("+sme")))
-void undefined(svbool_t pg, void *ptr) {
+void undefined(svbool_t pg, void *ptr) __arm_shared_za {
svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}}
}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index cb02ef09e130e..646c91198039a 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -384,6 +384,9 @@ class SVEEmitter {
/// Emit all the range checks for the immediates.
void createSMERangeChecks(raw_ostream &o);
+ /// Create a table for a builtin's requirement for PSTATE.ZA.
+ void createBuiltinZAState(raw_ostream &OS);
+
/// Create intrinsic and add it to \p Out
void createIntrinsic(Record *R,
SmallVectorImpl<std::unique_ptr<Intrinsic>> &Out);
@@ -1697,6 +1700,40 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
OS << "#endif\n\n";
}
+void SVEEmitter::createBuiltinZAState(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ OS << "#ifdef GET_SME_BUILTIN_HAS_ZA_STATE\n";
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ uint64_t IsSharedZAFlag = getEnumValueForFlag("IsSharedZA");
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end())
+ continue;
+
+ OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
+ if (Def->isFlagSet(IsSharedZAFlag))
+ OS << " return true;\n";
+ else
+ OS << " return false;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
@@ -1792,4 +1829,8 @@ void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME);
}
+
+void EmitSmeBuiltinZAState(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createBuiltinZAState(OS);
+}
} // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 9ba2fb07f1380..89e90adf7f334 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -509,6 +509,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmSmeStreamingAttrs:
EmitSmeStreamingAttrs(Records, OS);
break;
+ case GenArmSmeBuiltinZAState:
+ EmitSmeBuiltinZAState(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 2f1c96bfa5964..dabbf9d2055a7 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -110,6 +110,7 @@ void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeBuiltinZAState(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
>From c54c02865140e4838ca6c46eec96b0e15b4b04fd Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Wed, 6 Dec 2023 12:45:12 +0000
Subject: [PATCH 6/6] fixup: fix za state tests
---
.../aarch64-sme-intrinsics/acle_sme_add-i32.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_add-i64.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_ld1.c | 20 +-
.../acle_sme_ld1_vnum.c | 20 +-
.../aarch64-sme-intrinsics/acle_sme_ldr.c | 10 +-
.../acle_sme_mopa-za32.c | 14 +-
.../acle_sme_mopa-za64.c | 10 +-
.../acle_sme_mops-za32.c | 14 +-
.../acle_sme_mops-za64.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_read.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_st1.c | 20 +-
.../acle_sme_st1_vnum.c | 20 +-
.../aarch64-sme-intrinsics/acle_sme_str.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_write.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_zero.c | 8 +-
15 files changed, 286 insertions(+), 286 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
index ee6c1c9dd566b..55d2e355897f7 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
+void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
+void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
index 254ea89d22c50..8e9c2e7da46a3 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
+void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
+void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
index 5622568c4cd76..e17782db222b6 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
@@ -22,7 +22,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za8(0, slice_base, pg, ptr);
svld1_hor_za8(0, slice_base + 15, pg, ptr);
}
@@ -45,7 +45,7 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za16(0, slice_base, pg, ptr);
svld1_hor_za16(1, slice_base + 7, pg, ptr);
}
@@ -68,7 +68,7 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za32(0, slice_base, pg, ptr);
svld1_hor_za32(3, slice_base + 3, pg, ptr);
}
@@ -91,7 +91,7 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za64(0, slice_base, pg, ptr);
svld1_hor_za64(7, slice_base + 1, pg, ptr);
}
@@ -112,7 +112,7 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_hor_za128(0, slice_base, pg, ptr);
svld1_hor_za128(15, slice_base, pg, ptr);
}
@@ -133,7 +133,7 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za8(0, slice_base, pg, ptr);
svld1_ver_za8(0, slice_base + 15, pg, ptr);
}
@@ -156,7 +156,7 @@ void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za16(0, slice_base, pg, ptr);
svld1_ver_za16(1, slice_base + 7, pg, ptr);
}
@@ -179,7 +179,7 @@ void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za32(0, slice_base, pg, ptr);
svld1_ver_za32(3, slice_base + 3, pg, ptr);
}
@@ -202,7 +202,7 @@ void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za64(0, slice_base, pg, ptr);
svld1_ver_za64(7, slice_base + 1, pg, ptr);
}
@@ -223,7 +223,7 @@ void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
+void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
svld1_ver_za128(0, slice_base, pg, ptr);
svld1_ver_za128(15, slice_base, pg, ptr);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
index 0fe7dcfc0a799..0fa77e1144a7d 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
@@ -28,7 +28,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -57,7 +57,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -86,7 +86,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -115,7 +115,7 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -142,7 +142,7 @@ void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}
@@ -169,7 +169,7 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -198,7 +198,7 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -227,7 +227,7 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -256,7 +256,7 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -283,7 +283,7 @@ void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
+void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index 9af0778e89c5e..314c9645dd4f7 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -12,7 +12,7 @@
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
+void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, 0);
}
@@ -22,7 +22,7 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15)
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
+void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, 15);
}
@@ -32,7 +32,7 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svldr_za(uint32_t slice_base, const void *ptr) {
+void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_za(slice_base, ptr);
}
@@ -43,7 +43,7 @@ void test_svldr_za(uint32_t slice_base, const void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]])
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) {
+void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, vnum);
}
@@ -53,6 +53,6 @@ void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum)
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16)
// CHECK-NEXT: ret void
//
-void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) {
+void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_shared_za {
svldr_vnum_za(slice_base, ptr, 16);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
index b90c9be4a6e09..e84f31c2dfa92 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
+void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
+void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
+void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
+void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
+void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
index a56ce4d17f126..1b22eb64e9e36 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
+void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
+void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
+void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
index abcf4c2e698d7..0ff97ff92f714 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
+void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
+void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
+void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
+void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
+void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
+void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
index b26b9e4e51e05..5b190a7f9b748 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
+void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
+void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
+void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
+void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
index a15599d186a87..843a96da90278 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base);
}
@@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
+svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
index 9ed158aedf7e3..98ebbefc2e74c 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
@@ -22,7 +22,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za8(0, slice_base, pg, ptr);
svst1_hor_za8(0, slice_base + 15, pg, ptr);
}
@@ -45,7 +45,7 @@ void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za16(0, slice_base, pg, ptr);
svst1_hor_za16(1, slice_base + 7, pg, ptr);
}
@@ -68,7 +68,7 @@ void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za32(0, slice_base, pg, ptr);
svst1_hor_za32(3, slice_base + 3, pg, ptr);
}
@@ -91,7 +91,7 @@ void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za64(0, slice_base, pg, ptr);
svst1_hor_za64(7, slice_base + 1, pg, ptr);
}
@@ -112,7 +112,7 @@ void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_hor_za128(0, slice_base, pg, ptr);
svst1_hor_za128(15, slice_base, pg, ptr);
}
@@ -133,7 +133,7 @@ void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_str
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za8(0, slice_base, pg, ptr);
svst1_ver_za8(0, slice_base + 15, pg, ptr);
}
@@ -156,7 +156,7 @@ void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za16(0, slice_base, pg, ptr);
svst1_ver_za16(1, slice_base + 7, pg, ptr);
}
@@ -179,7 +179,7 @@ void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za32(0, slice_base, pg, ptr);
svst1_ver_za32(3, slice_base + 3, pg, ptr);
}
@@ -202,7 +202,7 @@ void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za64(0, slice_base, pg, ptr);
svst1_ver_za64(7, slice_base + 1, pg, ptr);
}
@@ -223,7 +223,7 @@ void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
+void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
svst1_ver_za128(0, slice_base, pg, ptr);
svst1_ver_za128(15, slice_base, pg, ptr);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
index 627098d9365bd..938e62a15c771 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
@@ -28,7 +28,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -57,7 +57,7 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -86,7 +86,7 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -115,7 +115,7 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -142,7 +142,7 @@ void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}
@@ -169,7 +169,7 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -198,7 +198,7 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -227,7 +227,7 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -256,7 +256,7 @@ void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -283,7 +283,7 @@ void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
+void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za {
svst1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
index baadfc18563a0..bcf368bc8dce4 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -12,7 +12,7 @@
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
+void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, 0);
}
@@ -22,7 +22,7 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15)
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
+void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, 15);
}
@@ -32,7 +32,7 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0)
// CHECK-NEXT: ret void
//
-void test_svstr_za(uint32_t slice_base, void *ptr) {
+void test_svstr_za(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_za(slice_base, ptr);
}
@@ -43,7 +43,7 @@ void test_svstr_za(uint32_t slice_base, void *ptr) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]])
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) {
+void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, vnum);
}
@@ -53,6 +53,6 @@ void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) {
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16)
// CHECK-NEXT: ret void
//
-void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) {
+void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_shared_za {
svstr_vnum_za(slice_base, ptr, 16);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
index 2cc338add314b..38c8402c3d0fa 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn);
}
@@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
+void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
+void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c
index 750eead7c705e..8b5bd79984c21 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c
@@ -18,7 +18,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 0)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_mask_za() {
+__arm_new_za void test_svzero_mask_za() {
svzero_mask_za(0);
}
@@ -34,7 +34,7 @@ void test_svzero_mask_za() {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 176)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_mask_za_1() {
+__arm_new_za void test_svzero_mask_za_1() {
svzero_mask_za(176);
}
@@ -50,7 +50,7 @@ void test_svzero_mask_za_1() {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_mask_za_2() {
+__arm_new_za void test_svzero_mask_za_2() {
svzero_mask_za(255);
}
@@ -66,7 +66,7 @@ void test_svzero_mask_za_2() {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255)
// CHECK-CXX-NEXT: ret void
//
-void test_svzero_za() {
+__arm_new_za void test_svzero_za() {
svzero_za();
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
More information about the cfe-commits
mailing list